summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/ioctl.go2
-rw-r--r--pkg/coverage/coverage.go39
-rw-r--r--pkg/sentry/devices/memdev/full.go4
-rw-r--r--pkg/sentry/devices/memdev/null.go4
-rw-r--r--pkg/sentry/devices/memdev/random.go4
-rw-r--r--pkg/sentry/devices/memdev/zero.go4
-rw-r--r--pkg/sentry/devices/ttydev/ttydev.go2
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go8
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go6
-rw-r--r--pkg/sentry/fsimpl/devpts/replica.go97
-rw-r--r--pkg/sentry/fsimpl/devtmpfs/devtmpfs.go4
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd.go2
-rw-r--r--pkg/sentry/fsimpl/ext/dentry.go2
-rw-r--r--pkg/sentry/fsimpl/ext/directory.go8
-rw-r--r--pkg/sentry/fsimpl/ext/ext.go2
-rw-r--r--pkg/sentry/fsimpl/ext/extent_file.go2
-rw-r--r--pkg/sentry/fsimpl/ext/filesystem.go4
-rw-r--r--pkg/sentry/fsimpl/ext/inode.go2
-rw-r--r--pkg/sentry/fsimpl/ext/regular_file.go6
-rw-r--r--pkg/sentry/fsimpl/ext/symlink.go4
-rw-r--r--pkg/sentry/fsimpl/fuse/connection.go25
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go16
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go55
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go18
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go39
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go2
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go7
-rw-r--r--pkg/sentry/fsimpl/gofer/socket.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go6
-rw-r--r--pkg/sentry/fsimpl/host/host.go12
-rw-r--r--pkg/sentry/fsimpl/host/mmap.go6
-rw-r--r--pkg/sentry/fsimpl/host/tty.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go8
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go11
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go142
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go72
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go59
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go20
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory.go14
-rw-r--r--pkg/sentry/fsimpl/overlay/directory.go7
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go50
-rw-r--r--pkg/sentry/fsimpl/overlay/non_directory.go6
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go86
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go4
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go7
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go1
-rw-r--r--pkg/sentry/fsimpl/proc/task.go2
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go1
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go5
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go1
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go2
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go6
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go1
-rw-r--r--pkg/sentry/fsimpl/signalfd/signalfd.go4
-rw-r--r--pkg/sentry/fsimpl/sockfs/sockfs.go5
-rw-r--r--pkg/sentry/fsimpl/sys/kcov.go3
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go9
-rw-r--r--pkg/sentry/fsimpl/timerfd/timerfd.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/device_file.go1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go18
-rw-r--r--pkg/sentry/fsimpl/tmpfs/named_pipe.go1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go7
-rw-r--r--pkg/sentry/fsimpl/tmpfs/socket_file.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/symlink.go1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go26
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go14
-rw-r--r--pkg/sentry/kernel/kcov.go4
-rw-r--r--pkg/sentry/kernel/kernel.go13
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go1
-rw-r--r--pkg/sentry/socket/netlink/provider_vfs2.go1
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go3
-rw-r--r--pkg/sentry/state/state.go6
-rw-r--r--pkg/sentry/vfs/anonfs.go5
-rw-r--r--pkg/sentry/vfs/dentry.go2
-rw-r--r--pkg/sentry/vfs/device.go3
-rw-r--r--pkg/sentry/vfs/epoll.go9
-rw-r--r--pkg/sentry/vfs/file_description.go10
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go28
-rw-r--r--pkg/sentry/vfs/filesystem.go6
-rw-r--r--pkg/sentry/vfs/filesystem_type.go9
-rw-r--r--pkg/sentry/vfs/genericfstree/genericfstree.go2
-rw-r--r--pkg/sentry/vfs/inotify.go2
-rw-r--r--pkg/sentry/vfs/lock.go2
-rw-r--r--pkg/sentry/vfs/mount.go9
-rw-r--r--pkg/sentry/vfs/mount_test.go26
-rw-r--r--pkg/sentry/vfs/mount_unsafe.go11
-rw-r--r--pkg/sentry/vfs/options.go36
-rw-r--r--pkg/sentry/vfs/permissions.go2
-rw-r--r--pkg/sentry/vfs/resolving_path.go5
-rw-r--r--pkg/sentry/vfs/vfs.go2
-rw-r--r--pkg/state/types.go14
-rw-r--r--pkg/tcpip/adapters/gonet/gonet_test.go4
-rw-r--r--pkg/tcpip/checker/checker.go60
-rw-r--r--pkg/tcpip/faketime/faketime.go20
-rw-r--r--pkg/tcpip/network/BUILD1
-rw-r--r--pkg/tcpip/network/arp/arp.go104
-rw-r--r--pkg/tcpip/network/arp/arp_test.go6
-rw-r--r--pkg/tcpip/network/fragmentation/BUILD1
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation.go48
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation_test.go136
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler.go12
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler_test.go4
-rw-r--r--pkg/tcpip/network/ip_test.go329
-rw-r--r--pkg/tcpip/network/ipv4/BUILD1
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go14
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go295
-rw-r--r--pkg/tcpip/network/ipv4/ipv4_test.go14
-rw-r--r--pkg/tcpip/network/ipv6/BUILD3
-rw-r--r--pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go (renamed from pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go)2
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go78
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go66
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go792
-rw-r--r--pkg/tcpip/network/ipv6/ipv6_test.go63
-rw-r--r--pkg/tcpip/network/ipv6/ndp.go (renamed from pkg/tcpip/stack/ndp.go)576
-rw-r--r--pkg/tcpip/network/ipv6/ndp_test.go103
-rw-r--r--pkg/tcpip/sample/tun_tcp_connect/main.go4
-rw-r--r--pkg/tcpip/sample/tun_tcp_echo/main.go4
-rw-r--r--pkg/tcpip/stack/BUILD5
-rw-r--r--pkg/tcpip/stack/addressable_endpoint_state.go717
-rw-r--r--pkg/tcpip/stack/addressable_endpoint_state_test.go72
-rw-r--r--pkg/tcpip/stack/forwarder_test.go61
-rw-r--r--pkg/tcpip/stack/ndp_test.go819
-rw-r--r--pkg/tcpip/stack/neighbor_entry.go17
-rw-r--r--pkg/tcpip/stack/neighbor_entry_test.go20
-rw-r--r--pkg/tcpip/stack/nic.go1343
-rw-r--r--pkg/tcpip/stack/nic_test.go151
-rw-r--r--pkg/tcpip/stack/nud_test.go20
-rw-r--r--pkg/tcpip/stack/registration.go305
-rw-r--r--pkg/tcpip/stack/route.go119
-rw-r--r--pkg/tcpip/stack/stack.go368
-rw-r--r--pkg/tcpip/stack/stack_test.go296
-rw-r--r--pkg/tcpip/stack/transport_demuxer.go4
-rw-r--r--pkg/tcpip/stack/transport_demuxer_test.go8
-rw-r--r--pkg/tcpip/stack/transport_test.go45
-rw-r--r--pkg/tcpip/tcpip.go19
-rw-r--r--pkg/tcpip/tests/integration/loopback_test.go6
-rw-r--r--pkg/tcpip/tests/integration/multicast_broadcast_test.go22
-rw-r--r--pkg/tcpip/transport/icmp/protocol.go25
-rw-r--r--pkg/tcpip/transport/tcp/BUILD1
-rw-r--r--pkg/tcpip/transport/tcp/connect.go19
-rw-r--r--pkg/tcpip/transport/tcp/dual_stack_test.go16
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go174
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go4
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go20
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go57
-rw-r--r--pkg/tcpip/transport/tcp/segment.go45
-rw-r--r--pkg/tcpip/transport/tcp/segment_queue.go52
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go887
-rw-r--r--pkg/tcpip/transport/tcp/tcp_timestamp_test.go19
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go89
-rw-r--r--pkg/tcpip/transport/udp/protocol.go20
-rw-r--r--pkg/tcpip/transport/udp/udp_test.go70
155 files changed, 6062 insertions, 3809 deletions
diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index 3356a2b4a..dc9ac7e7c 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -115,7 +115,7 @@ const (
// Constants from uapi/linux/fs.h.
const (
- FS_IOC_GETFLAGS = 2147771905
+ FS_IOC_GETFLAGS = 2148034049
FS_VERITY_FL = 1048576
)
diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go
index 6831adcce..a4f4b2c5e 100644
--- a/pkg/coverage/coverage.go
+++ b/pkg/coverage/coverage.go
@@ -100,12 +100,9 @@ var coveragePool = sync.Pool{
// instrumentation_filter.
//
// Note that we "consume", i.e. clear, coverdata when this function is run, to
-// ensure that each event is only reported once.
-//
-// TODO(b/160639712): evaluate whether it is ok to reset the global coverage
-// data every time this function is run. We could technically have each thread
-// store a local snapshot against which we compare the most recent coverdata so
-// that separate threads do not affect each other's view of the data.
+// ensure that each event is only reported once. Due to the limitations of Go
+// coverage tools, we reset the global coverage data every time this function is
+// run.
func ConsumeCoverageData(w io.Writer) int {
once.Do(initCoverageData)
@@ -117,23 +114,23 @@ func ConsumeCoverageData(w io.Writer) int {
for fileIndex, file := range globalData.files {
counters := coverdata.Cover.Counters[file]
for index := 0; index < len(counters); index++ {
- val := atomic.SwapUint32(&counters[index], 0)
- if val != 0 {
- // Calculate the synthetic PC.
- pc := globalData.syntheticPCs[fileIndex][index]
-
- usermem.ByteOrder.PutUint64(pcBuffer[:], pc)
- n, err := w.Write(pcBuffer[:])
- if err != nil {
- if err == io.EOF {
- // Simply stop writing if we encounter EOF; it's ok if we attempted to
- // write more than we can hold.
- return total + n
- }
- panic(fmt.Sprintf("Internal error writing PCs to kcov area: %v", err))
+ if atomic.LoadUint32(&counters[index]) == 0 {
+ continue
+ }
+ // Non-zero coverage data found; consume it and report as a PC.
+ atomic.StoreUint32(&counters[index], 0)
+ pc := globalData.syntheticPCs[fileIndex][index]
+ usermem.ByteOrder.PutUint64(pcBuffer[:], pc)
+ n, err := w.Write(pcBuffer[:])
+ if err != nil {
+ if err == io.EOF {
+ // Simply stop writing if we encounter EOF; it's ok if we attempted to
+ // write more than we can hold.
+ return total + n
}
- total += n
+ panic(fmt.Sprintf("Internal error writing PCs to kcov area: %v", err))
}
+ total += n
}
}
diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go
index 511179e31..fece3e762 100644
--- a/pkg/sentry/devices/memdev/full.go
+++ b/pkg/sentry/devices/memdev/full.go
@@ -24,6 +24,8 @@ import (
const fullDevMinor = 7
// fullDevice implements vfs.Device for /dev/full.
+//
+// +stateify savable
type fullDevice struct{}
// Open implements vfs.Device.Open.
@@ -38,6 +40,8 @@ func (fullDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, op
}
// fullFD implements vfs.FileDescriptionImpl for /dev/full.
+//
+// +stateify savable
type fullFD struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/devices/memdev/null.go b/pkg/sentry/devices/memdev/null.go
index 4918dbeeb..ff5837747 100644
--- a/pkg/sentry/devices/memdev/null.go
+++ b/pkg/sentry/devices/memdev/null.go
@@ -25,6 +25,8 @@ import (
const nullDevMinor = 3
// nullDevice implements vfs.Device for /dev/null.
+//
+// +stateify savable
type nullDevice struct{}
// Open implements vfs.Device.Open.
@@ -39,6 +41,8 @@ func (nullDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, op
}
// nullFD implements vfs.FileDescriptionImpl for /dev/null.
+//
+// +stateify savable
type nullFD struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/devices/memdev/random.go b/pkg/sentry/devices/memdev/random.go
index 5e7fe0280..ac943e3ba 100644
--- a/pkg/sentry/devices/memdev/random.go
+++ b/pkg/sentry/devices/memdev/random.go
@@ -30,6 +30,8 @@ const (
)
// randomDevice implements vfs.Device for /dev/random and /dev/urandom.
+//
+// +stateify savable
type randomDevice struct{}
// Open implements vfs.Device.Open.
@@ -44,6 +46,8 @@ func (randomDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry,
}
// randomFD implements vfs.FileDescriptionImpl for /dev/random.
+//
+// +stateify savable
type randomFD struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go
index 60cfea888..1929e41cd 100644
--- a/pkg/sentry/devices/memdev/zero.go
+++ b/pkg/sentry/devices/memdev/zero.go
@@ -27,6 +27,8 @@ import (
const zeroDevMinor = 5
// zeroDevice implements vfs.Device for /dev/zero.
+//
+// +stateify savable
type zeroDevice struct{}
// Open implements vfs.Device.Open.
@@ -41,6 +43,8 @@ func (zeroDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, op
}
// zeroFD implements vfs.FileDescriptionImpl for /dev/zero.
+//
+// +stateify savable
type zeroFD struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go
index 664e54498..a287c65ca 100644
--- a/pkg/sentry/devices/ttydev/ttydev.go
+++ b/pkg/sentry/devices/ttydev/ttydev.go
@@ -30,6 +30,8 @@ const (
)
// ttyDevice implements vfs.Device for /dev/tty.
+//
+// +stateify savable
type ttyDevice struct{}
// Open implements vfs.Device.Open.
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 77d1f493d..903135fae 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -35,6 +35,8 @@ import (
const Name = "devpts"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// Name implements vfs.FilesystemType.Name.
@@ -58,6 +60,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.Filesystem.VFSFilesystem(), root.VFSDentry(), nil
}
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -110,6 +113,8 @@ func (fs *filesystem) Release(ctx context.Context) {
}
// rootInode is the root directory inode for the devpts mounts.
+//
+// +stateify savable
type rootInode struct {
implStatFS
kernfs.AlwaysValid
@@ -131,7 +136,7 @@ type rootInode struct {
root *rootInode
// mu protects the fields below.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// replicas maps pty ids to replica inodes.
replicas map[uint32]*replicaInode
@@ -242,6 +247,7 @@ func (i *rootInode) DecRef(context.Context) {
i.rootInodeRefs.DecRef(i.Destroy)
}
+// +stateify savable
type implStatFS struct{}
// StatFS implements kernfs.Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 98d4ffb22..69c2fe951 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -31,6 +31,8 @@ import (
)
// masterInode is the inode for the master end of the Terminal.
+//
+// +stateify savable
type masterInode struct {
implStatFS
kernfs.InodeAttrs
@@ -56,14 +58,12 @@ func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernf
return nil, err
}
- mi.IncRef()
fd := &masterFileDescription{
inode: mi,
t: t,
}
fd.LockFD.Init(&mi.locks)
if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
- mi.DecRef(ctx)
return nil, err
}
return &fd.vfsfd, nil
@@ -89,6 +89,7 @@ func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds
return mi.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
}
+// +stateify savable
type masterFileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -103,7 +104,6 @@ var _ vfs.FileDescriptionImpl = (*masterFileDescription)(nil)
// Release implements vfs.FileDescriptionImpl.Release.
func (mfd *masterFileDescription) Release(ctx context.Context) {
mfd.inode.root.masterClose(mfd.t)
- mfd.inode.DecRef(ctx)
}
// EventRegister implements waiter.Waitable.EventRegister.
diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go
index 816bac80c..6515c5536 100644
--- a/pkg/sentry/fsimpl/devpts/replica.go
+++ b/pkg/sentry/fsimpl/devpts/replica.go
@@ -30,6 +30,8 @@ import (
)
// replicaInode is the inode for the replica end of the Terminal.
+//
+// +stateify savable
type replicaInode struct {
implStatFS
kernfs.InodeAttrs
@@ -52,14 +54,12 @@ type replicaInode struct {
var _ kernfs.Inode = (*replicaInode)(nil)
// Open implements kernfs.Inode.Open.
-func (si *replicaInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- si.IncRef()
+func (ri *replicaInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd := &replicaFileDescription{
- inode: si,
+ inode: ri,
}
- fd.LockFD.Init(&si.locks)
+ fd.LockFD.Init(&ri.locks)
if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
- si.DecRef(ctx)
return nil, err
}
return &fd.vfsfd, nil
@@ -67,34 +67,35 @@ func (si *replicaInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kern
}
// Valid implements kernfs.Inode.Valid.
-func (si *replicaInode) Valid(context.Context) bool {
+func (ri *replicaInode) Valid(context.Context) bool {
// Return valid if the replica still exists.
- si.root.mu.Lock()
- defer si.root.mu.Unlock()
- _, ok := si.root.replicas[si.t.n]
+ ri.root.mu.Lock()
+ defer ri.root.mu.Unlock()
+ _, ok := ri.root.replicas[ri.t.n]
return ok
}
// Stat implements kernfs.Inode.Stat.
-func (si *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- statx, err := si.InodeAttrs.Stat(ctx, vfsfs, opts)
+func (ri *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ statx, err := ri.InodeAttrs.Stat(ctx, vfsfs, opts)
if err != nil {
return linux.Statx{}, err
}
statx.Blksize = 1024
statx.RdevMajor = linux.UNIX98_PTY_REPLICA_MAJOR
- statx.RdevMinor = si.t.n
+ statx.RdevMinor = ri.t.n
return statx, nil
}
// SetStat implements kernfs.Inode.SetStat
-func (si *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+func (ri *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
if opts.Stat.Mask&linux.STATX_SIZE != 0 {
return syserror.EINVAL
}
- return si.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
+ return ri.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
}
+// +stateify savable
type replicaFileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -106,37 +107,35 @@ type replicaFileDescription struct {
var _ vfs.FileDescriptionImpl = (*replicaFileDescription)(nil)
// Release implements fs.FileOperations.Release.
-func (sfd *replicaFileDescription) Release(ctx context.Context) {
- sfd.inode.DecRef(ctx)
-}
+func (rfd *replicaFileDescription) Release(ctx context.Context) {}
// EventRegister implements waiter.Waitable.EventRegister.
-func (sfd *replicaFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
- sfd.inode.t.ld.replicaWaiter.EventRegister(e, mask)
+func (rfd *replicaFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ rfd.inode.t.ld.replicaWaiter.EventRegister(e, mask)
}
// EventUnregister implements waiter.Waitable.EventUnregister.
-func (sfd *replicaFileDescription) EventUnregister(e *waiter.Entry) {
- sfd.inode.t.ld.replicaWaiter.EventUnregister(e)
+func (rfd *replicaFileDescription) EventUnregister(e *waiter.Entry) {
+ rfd.inode.t.ld.replicaWaiter.EventUnregister(e)
}
// Readiness implements waiter.Waitable.Readiness.
-func (sfd *replicaFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
- return sfd.inode.t.ld.replicaReadiness()
+func (rfd *replicaFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
+ return rfd.inode.t.ld.replicaReadiness()
}
// Read implements vfs.FileDescriptionImpl.Read.
-func (sfd *replicaFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
- return sfd.inode.t.ld.inputQueueRead(ctx, dst)
+func (rfd *replicaFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
+ return rfd.inode.t.ld.inputQueueRead(ctx, dst)
}
// Write implements vfs.FileDescriptionImpl.Write.
-func (sfd *replicaFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
- return sfd.inode.t.ld.outputQueueWrite(ctx, src)
+func (rfd *replicaFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
+ return rfd.inode.t.ld.outputQueueWrite(ctx, src)
}
// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
-func (sfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (rfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
t := kernel.TaskFromContext(ctx)
if t == nil {
// ioctl(2) may only be called from a task goroutine.
@@ -146,35 +145,35 @@ func (sfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg
switch cmd := args[1].Uint(); cmd {
case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
// Get the number of bytes in the input queue read buffer.
- return 0, sfd.inode.t.ld.inputQueueReadSize(t, io, args)
+ return 0, rfd.inode.t.ld.inputQueueReadSize(t, io, args)
case linux.TCGETS:
- return sfd.inode.t.ld.getTermios(t, args)
+ return rfd.inode.t.ld.getTermios(t, args)
case linux.TCSETS:
- return sfd.inode.t.ld.setTermios(t, args)
+ return rfd.inode.t.ld.setTermios(t, args)
case linux.TCSETSW:
// TODO(b/29356795): This should drain the output queue first.
- return sfd.inode.t.ld.setTermios(t, args)
+ return rfd.inode.t.ld.setTermios(t, args)
case linux.TIOCGPTN:
- nP := primitive.Uint32(sfd.inode.t.n)
+ nP := primitive.Uint32(rfd.inode.t.n)
_, err := nP.CopyOut(t, args[2].Pointer())
return 0, err
case linux.TIOCGWINSZ:
- return 0, sfd.inode.t.ld.windowSize(t, args)
+ return 0, rfd.inode.t.ld.windowSize(t, args)
case linux.TIOCSWINSZ:
- return 0, sfd.inode.t.ld.setWindowSize(t, args)
+ return 0, rfd.inode.t.ld.setWindowSize(t, args)
case linux.TIOCSCTTY:
// Make the given terminal the controlling terminal of the
// calling process.
- return 0, sfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */)
+ return 0, rfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */)
case linux.TIOCNOTTY:
// Release this process's controlling terminal.
- return 0, sfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */)
+ return 0, rfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */)
case linux.TIOCGPGRP:
// Get the foreground process group.
- return sfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */)
+ return rfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */)
case linux.TIOCSPGRP:
// Set the foreground process group.
- return sfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */)
+ return rfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */)
default:
maybeEmitUnimplementedEvent(ctx, cmd)
return 0, syserror.ENOTTY
@@ -182,24 +181,24 @@ func (sfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg
}
// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (sfd *replicaFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+func (rfd *replicaFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
creds := auth.CredentialsFromContext(ctx)
- fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
- return sfd.inode.SetStat(ctx, fs, creds, opts)
+ fs := rfd.vfsfd.VirtualDentry().Mount().Filesystem()
+ return rfd.inode.SetStat(ctx, fs, creds, opts)
}
// Stat implements vfs.FileDescriptionImpl.Stat.
-func (sfd *replicaFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
- return sfd.inode.Stat(ctx, fs, opts)
+func (rfd *replicaFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ fs := rfd.vfsfd.VirtualDentry().Mount().Filesystem()
+ return rfd.inode.Stat(ctx, fs, opts)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (sfd *replicaFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block)
+func (rfd *replicaFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return rfd.Locks().LockPOSIX(ctx, &rfd.vfsfd, uid, t, start, length, whence, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (sfd *replicaFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence)
+func (rfd *replicaFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return rfd.Locks().UnlockPOSIX(ctx, &rfd.vfsfd, uid, start, length, whence)
}
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
index a23094e54..6d1753080 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
@@ -33,8 +33,10 @@ import (
const Name = "devtmpfs"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct {
- initOnce sync.Once
+ initOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1664): not yet supported.
initErr error
// fs is the tmpfs filesystem that backs all mounts of this FilesystemType.
diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go
index bb0bf3a07..1c27ad700 100644
--- a/pkg/sentry/fsimpl/eventfd/eventfd.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd.go
@@ -33,6 +33,8 @@ import (
// EventFileDescription implements vfs.FileDescriptionImpl for file-based event
// notification (eventfd). Eventfds are usually internal to the Sentry but in
// certain situations they may be converted into a host-backed eventfd.
+//
+// +stateify savable
type EventFileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go
index 7a1b4219f..9bfed883a 100644
--- a/pkg/sentry/fsimpl/ext/dentry.go
+++ b/pkg/sentry/fsimpl/ext/dentry.go
@@ -20,6 +20,8 @@ import (
)
// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
type dentry struct {
vfsd vfs.Dentry
diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go
index 0fc01668d..452450d82 100644
--- a/pkg/sentry/fsimpl/ext/directory.go
+++ b/pkg/sentry/fsimpl/ext/directory.go
@@ -28,6 +28,8 @@ import (
)
// directory represents a directory inode. It holds the childList in memory.
+//
+// +stateify savable
type directory struct {
inode inode
@@ -39,7 +41,7 @@ type directory struct {
// Lock Order (outermost locks must be taken first):
// directory.mu
// filesystem.mu
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// childList is a list containing (1) child dirents and (2) fake dirents
// (with diskDirent == nil) that represent the iteration position of
@@ -120,6 +122,8 @@ func (i *inode) isDir() bool {
}
// dirent is the directory.childList node.
+//
+// +stateify savable
type dirent struct {
diskDirent disklayout.Dirent
@@ -129,6 +133,8 @@ type dirent struct {
// directoryFD represents a directory file description. It implements
// vfs.FileDescriptionImpl.
+//
+// +stateify savable
type directoryFD struct {
fileDescription
vfs.DirectoryFileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go
index 08ffc2834..aca258d40 100644
--- a/pkg/sentry/fsimpl/ext/ext.go
+++ b/pkg/sentry/fsimpl/ext/ext.go
@@ -34,6 +34,8 @@ import (
const Name = "ext"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// Compiles only if FilesystemType implements vfs.FilesystemType.
diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go
index c36225a7c..04917d762 100644
--- a/pkg/sentry/fsimpl/ext/extent_file.go
+++ b/pkg/sentry/fsimpl/ext/extent_file.go
@@ -24,6 +24,8 @@ import (
)
// extentFile is a type of regular file which uses extents to store file data.
+//
+// +stateify savable
type extentFile struct {
regFile regularFile
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index a4a6d8c55..917f1873d 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -38,11 +38,13 @@ var (
)
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
vfsfs vfs.Filesystem
// mu serializes changes to the Dentry tree.
- mu sync.RWMutex
+ mu sync.RWMutex `state:"nosave"`
// dev represents the underlying fs device. It does not require protection
// because io.ReaderAt permits concurrent read calls to it. It translates to
diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go
index 30636cf66..9009ba3c7 100644
--- a/pkg/sentry/fsimpl/ext/inode.go
+++ b/pkg/sentry/fsimpl/ext/inode.go
@@ -37,6 +37,8 @@ import (
// |-- regular--
// |-- extent file
// |-- block map file
+//
+// +stateify savable
type inode struct {
// refs is a reference count. refs is accessed using atomic memory operations.
refs int64
diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go
index e73e740d6..4a5539b37 100644
--- a/pkg/sentry/fsimpl/ext/regular_file.go
+++ b/pkg/sentry/fsimpl/ext/regular_file.go
@@ -31,6 +31,8 @@ import (
// regularFile represents a regular file's inode. This too follows the
// inheritance pattern prevelant in the vfs layer described in
// pkg/sentry/vfs/README.md.
+//
+// +stateify savable
type regularFile struct {
inode inode
@@ -67,6 +69,8 @@ func (in *inode) isRegular() bool {
// directoryFD represents a directory file description. It implements
// vfs.FileDescriptionImpl.
+//
+// +stateify savable
type regularFileFD struct {
fileDescription
vfs.LockFD
@@ -75,7 +79,7 @@ type regularFileFD struct {
off int64
// offMu serializes operations that may mutate off.
- offMu sync.Mutex
+ offMu sync.Mutex `state:"nosave"`
}
// Release implements vfs.FileDescriptionImpl.Release.
diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go
index f33592d59..5e2bcc837 100644
--- a/pkg/sentry/fsimpl/ext/symlink.go
+++ b/pkg/sentry/fsimpl/ext/symlink.go
@@ -23,6 +23,8 @@ import (
)
// symlink represents a symlink inode.
+//
+// +stateify savable
type symlink struct {
inode inode
target string // immutable
@@ -64,6 +66,8 @@ func (in *inode) isSymlink() bool {
// symlinkFD represents a symlink file description and implements
// vfs.FileDescriptionImpl. which may only be used if open options contains
// O_PATH. For this reason most of the functions return EBADF.
+//
+// +stateify savable
type symlinkFD struct {
fileDescription
vfs.NoLockFD
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index dbc5e1954..8ccda1264 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -39,15 +39,18 @@ const (
)
// connection is the struct by which the sentry communicates with the FUSE server daemon.
+//
// Lock order:
// - conn.fd.mu
// - conn.mu
// - conn.asyncMu
+//
+// +stateify savable
type connection struct {
fd *DeviceFD
// mu protects access to struct memebers.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// attributeVersion is the version of connection's attributes.
attributeVersion uint64
@@ -75,7 +78,7 @@ type connection struct {
initialized int32
// initializedChan is used to block requests before initialization.
- initializedChan chan struct{}
+ initializedChan chan struct{} `state:".(bool)"`
// connected (connection established) when a new FUSE file system is created.
// Set to false when:
@@ -113,7 +116,7 @@ type connection struct {
// i.e. `!request.noReply`
// asyncMu protects the async request fields.
- asyncMu sync.Mutex
+ asyncMu sync.Mutex `state:"nosave"`
// asyncNum is the number of async requests.
// Protected by asyncMu.
@@ -174,6 +177,22 @@ type connection struct {
noOpen bool
}
+func (conn *connection) saveInitializedChan() bool {
+ select {
+ case <-conn.initializedChan:
+ return true // Closed.
+ default:
+ return false // Not closed.
+ }
+}
+
+func (conn *connection) loadInitializedChan(closed bool) {
+ conn.initializedChan = make(chan struct{}, 1)
+ if closed {
+ close(conn.initializedChan)
+ }
+}
+
// newFUSEConnection creates a FUSE connection to fd.
func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, opts *filesystemOptions) (*connection, error) {
// Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index f690ef5ad..1b86a4b4c 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -31,6 +31,8 @@ import (
const fuseDevMinor = 229
// fuseDevice implements vfs.Device for /dev/fuse.
+//
+// +stateify savable
type fuseDevice struct{}
// Open implements vfs.Device.Open.
@@ -49,6 +51,8 @@ func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, op
}
// DeviceFD implements vfs.FileDescriptionImpl for /dev/fuse.
+//
+// +stateify savable
type DeviceFD struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -79,7 +83,7 @@ type DeviceFD struct {
writeCursorFR *futureResponse
// mu protects all the queues, maps, buffers and cursors and nextOpID.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// waitQueue is used to notify interested parties when the device becomes
// readable or writable.
@@ -88,12 +92,20 @@ type DeviceFD struct {
// fullQueueCh is a channel used to synchronize the readers with the writers.
// Writers (inbound requests to the filesystem) block if there are too many
// unprocessed in-flight requests.
- fullQueueCh chan struct{}
+ fullQueueCh chan struct{} `state:".(int)"`
// fs is the FUSE filesystem that this FD is being used for.
fs *filesystem
}
+func (fd *DeviceFD) saveFullQueueCh() int {
+ return cap(fd.fullQueueCh)
+}
+
+func (fd *DeviceFD) loadFullQueueCh(capacity int) {
+ fd.fullQueueCh = make(chan struct{}, capacity)
+}
+
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *DeviceFD) Release(ctx context.Context) {
if fd.fs != nil {
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 1d42a51f4..65786e42a 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -41,8 +41,11 @@ const Name = "fuse"
const maxActiveRequestsDefault = 10000
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
+// +stateify savable
type filesystemOptions struct {
// userID specifies the numeric uid of the mount owner.
// This option should not be specified by the filesystem owner.
@@ -73,6 +76,8 @@ type filesystemOptions struct {
}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
devMinor uint32
@@ -240,6 +245,8 @@ func (fs *filesystem) Release(ctx context.Context) {
}
// inode implements kernfs.Inode.
+//
+// +stateify savable
type inode struct {
inodeRefs
kernfs.InodeAttrs
@@ -419,7 +426,7 @@ func (*inode) Valid(ctx context.Context) bool {
}
// NewFile implements kernfs.Inode.NewFile.
-func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
+func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*kernfs.Dentry, error) {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID)
@@ -433,15 +440,11 @@ func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions)
},
Name: name,
}
- d, err := i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
- if err != nil {
- return nil, err
- }
- return d.VFSDentry(), nil
+ return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
}
// NewNode implements kernfs.Inode.NewNode.
-func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) {
+func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*kernfs.Dentry, error) {
in := linux.FUSEMknodIn{
MknodMeta: linux.FUSEMknodMeta{
Mode: uint32(opts.Mode),
@@ -450,28 +453,20 @@ func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions)
},
Name: name,
}
- d, err := i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
- if err != nil {
- return nil, err
- }
- return d.VFSDentry(), nil
+ return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
}
// NewSymlink implements kernfs.Inode.NewSymlink.
-func (i *inode) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error) {
+func (i *inode) NewSymlink(ctx context.Context, name, target string) (*kernfs.Dentry, error) {
in := linux.FUSESymLinkIn{
Name: name,
Target: target,
}
- d, err := i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
- if err != nil {
- return nil, err
- }
- return d.VFSDentry(), nil
+ return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
}
// Unlink implements kernfs.Inode.Unlink.
-func (i *inode) Unlink(ctx context.Context, name string, child *vfs.Dentry) error {
+func (i *inode) Unlink(ctx context.Context, name string, child *kernfs.Dentry) error {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
@@ -494,7 +489,7 @@ func (i *inode) Unlink(ctx context.Context, name string, child *vfs.Dentry) erro
}
// NewDir implements kernfs.Inode.NewDir.
-func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
+func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*kernfs.Dentry, error) {
in := linux.FUSEMkdirIn{
MkdirMeta: linux.FUSEMkdirMeta{
Mode: uint32(opts.Mode),
@@ -502,15 +497,11 @@ func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions)
},
Name: name,
}
- d, err := i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
- if err != nil {
- return nil, err
- }
- return d.VFSDentry(), nil
+ return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
}
// RmDir implements kernfs.Inode.RmDir.
-func (i *inode) RmDir(ctx context.Context, name string, child *vfs.Dentry) error {
+func (i *inode) RmDir(ctx context.Context, name string, child *kernfs.Dentry) error {
fusefs := i.fs
task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
@@ -528,12 +519,7 @@ func (i *inode) RmDir(ctx context.Context, name string, child *vfs.Dentry) error
return err
}
- // TODO(Before merging): When creating new nodes, should we add nodes to the ordered children?
- // If so we'll probably need to call this. We will also need to add them with the writable flag when
- // appropriate.
- // return i.OrderedChildren.RmDir(ctx, name, child)
-
- return nil
+ return i.dentry.RemoveChildLocked(name, child)
}
// newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
@@ -563,11 +549,6 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
return nil, syserror.EIO
}
child := i.fs.newInode(out.NodeID, out.Attr)
- if opcode == linux.FUSE_LOOKUP {
- i.dentry.InsertChildLocked(name, child)
- } else {
- i.dentry.InsertChild(name, child)
- }
return child, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 91d2ae199..18c884b59 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -117,11 +117,12 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
d.syntheticChildren++
}
+// +stateify savable
type directoryFD struct {
fileDescription
vfs.DirectoryFileDescriptionDefaultImpl
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
off int64
dirents []vfs.Dirent
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 97b9165cc..94d96261b 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -1416,11 +1416,11 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
return err
}
- if err := d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()); err != nil {
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount())
+ fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ if err != nil {
return err
}
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
@@ -1556,11 +1556,11 @@ func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
return err
}
- if err := d.setXattr(ctx, rp.Credentials(), &opts); err != nil {
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ err = d.setXattr(ctx, rp.Credentials(), &opts)
+ fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ if err != nil {
return err
}
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
@@ -1575,11 +1575,11 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,
fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
return err
}
- if err := d.removeXattr(ctx, rp.Credentials(), name); err != nil {
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ err = d.removeXattr(ctx, rp.Credentials(), name)
+ fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+ if err != nil {
return err
}
- fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index aaad9c0d9..8608471f8 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -62,9 +62,13 @@ import (
const Name = "9p"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
vfsfs vfs.Filesystem
@@ -77,7 +81,7 @@ type filesystem struct {
iopts InternalFilesystemOptions
// client is the client used by this filesystem. client is immutable.
- client *p9.Client
+ client *p9.Client `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
// clock is a realtime clock used to set timestamps in file operations.
clock ktime.Clock
@@ -95,7 +99,7 @@ type filesystem struct {
// reference count (such that it is usable as vfs.ResolvingPath.Start() or
// is reachable from its children), or if it is a child dentry (such that
// it is reachable from its parent).
- renameMu sync.RWMutex
+ renameMu sync.RWMutex `state:"nosave"`
// cachedDentries contains all dentries with 0 references. (Due to race
// conditions, it may also contain dentries with non-zero references.)
@@ -107,7 +111,7 @@ type filesystem struct {
// syncableDentries contains all dentries in this filesystem for which
// !dentry.file.isNil(). specialFileFDs contains all open specialFileFDs.
// These fields are protected by syncMu.
- syncMu sync.Mutex
+ syncMu sync.Mutex `state:"nosave"`
syncableDentries map[*dentry]struct{}
specialFileFDs map[*specialFileFD]struct{}
@@ -120,6 +124,8 @@ type filesystem struct {
// dentries, it comes from QID.Path from the 9P server. Synthetic dentries
// have have their inodeNumber generated sequentially, with the MSB reserved to
// prevent conflicts with regular dentries.
+//
+// +stateify savable
type inodeNumber uint64
// Reserve MSB for synthetic mounts.
@@ -132,6 +138,7 @@ func inoFromPath(path uint64) inodeNumber {
return inodeNumber(path &^ syntheticInoMask)
}
+// +stateify savable
type filesystemOptions struct {
// "Standard" 9P options.
fd int
@@ -177,6 +184,8 @@ type filesystemOptions struct {
// InteropMode controls the client's interaction with other remote filesystem
// users.
+//
+// +stateify savable
type InteropMode uint32
const (
@@ -235,6 +244,8 @@ const (
// InternalFilesystemOptions may be passed as
// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem.
+//
+// +stateify savable
type InternalFilesystemOptions struct {
// If LeakConnection is true, do not close the connection to the server
// when the Filesystem is released. This is necessary for deployments in
@@ -534,6 +545,8 @@ func (fs *filesystem) Release(ctx context.Context) {
}
// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
type dentry struct {
vfsd vfs.Dentry
@@ -563,7 +576,7 @@ type dentry struct {
// If file.isNil(), this dentry represents a synthetic file, i.e. a file
// that does not exist on the remote filesystem. As of this writing, the
// only files that can be synthetic are sockets, pipes, and directories.
- file p9file
+ file p9file `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
// If deleted is non-zero, the file represented by this dentry has been
// deleted. deleted is accessed using atomic memory operations.
@@ -575,7 +588,7 @@ type dentry struct {
cached bool
dentryEntry
- dirMu sync.Mutex
+ dirMu sync.Mutex `state:"nosave"`
// If this dentry represents a directory, children contains:
//
@@ -607,7 +620,7 @@ type dentry struct {
// To mutate:
// - Lock metadataMu and use atomic operations to update because we might
// have atomic readers that don't hold the lock.
- metadataMu sync.Mutex
+ metadataMu sync.Mutex `state:"nosave"`
ino inodeNumber // immutable
mode uint32 // type is immutable, perms are mutable
uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
@@ -638,7 +651,7 @@ type dentry struct {
// other metadata fields.
nlink uint32
- mapsMu sync.Mutex
+ mapsMu sync.Mutex `state:"nosave"`
// If this dentry represents a regular file, mappings tracks mappings of
// the file into memmap.MappingSpaces. mappings is protected by mapsMu.
@@ -662,12 +675,12 @@ type dentry struct {
// either p9.File transitions from closed (isNil() == true) to open
// (isNil() == false), it may be mutated with handleMu locked, but cannot
// be closed until the dentry is destroyed.
- handleMu sync.RWMutex
- readFile p9file
- writeFile p9file
+ handleMu sync.RWMutex `state:"nosave"`
+ readFile p9file `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
+ writeFile p9file `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
hostFD int32
- dataMu sync.RWMutex
+ dataMu sync.RWMutex `state:"nosave"`
// If this dentry represents a regular file that is client-cached, cache
// maps offsets into the cached file to offsets into
@@ -1627,12 +1640,14 @@ func (d *dentry) decLinks() {
// fileDescription is embedded by gofer implementations of
// vfs.FileDescriptionImpl.
+//
+// +stateify savable
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
vfs.LockFD
- lockLogging sync.Once
+ lockLogging sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
}
func (fd *fileDescription) filesystem() *filesystem {
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 104157512..a9ebe1206 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -25,6 +25,8 @@ import (
// handle represents a remote "open file descriptor", consisting of an opened
// fid (p9.File) and optionally a host file descriptor.
+//
+// These are explicitly not savable.
type handle struct {
file p9file
fd int32 // -1 if unavailable
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 24f03ee94..eeaf6e444 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -39,11 +39,12 @@ func (d *dentry) isRegularFile() bool {
return d.fileType() == linux.S_IFREG
}
+// +stateify savable
type regularFileFD struct {
fileDescription
// off is the file offset. off is protected by mu.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
off int64
}
@@ -898,6 +899,8 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) {
// dentryPlatformFile is only used when a host FD representing the remote file
// is available (i.e. dentry.hostFD >= 0), and that FD is used for application
// memory mappings (i.e. !filesystem.opts.forcePageCache).
+//
+// +stateify savable
type dentryPlatformFile struct {
*dentry
@@ -910,7 +913,7 @@ type dentryPlatformFile struct {
hostFileMapper fsutil.HostFileMapper
// hostFileMapperInitOnce is used to lazily initialize hostFileMapper.
- hostFileMapperInitOnce sync.Once
+ hostFileMapperInitOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
}
// IncRef implements memmap.File.IncRef.
diff --git a/pkg/sentry/fsimpl/gofer/socket.go b/pkg/sentry/fsimpl/gofer/socket.go
index 85d2bee72..326b940a7 100644
--- a/pkg/sentry/fsimpl/gofer/socket.go
+++ b/pkg/sentry/fsimpl/gofer/socket.go
@@ -36,12 +36,14 @@ func (d *dentry) isSocket() bool {
// An endpoint's lifetime is the time between when filesystem.BoundEndpointAt()
// is called and either BoundEndpoint.BidirectionalConnect or
// BoundEndpoint.UnidirectionalConnect is called.
+//
+// +stateify savable
type endpoint struct {
// dentry is the filesystem dentry which produced this endpoint.
dentry *dentry
// file is the p9 file that contains a single unopened fid.
- file p9.File
+ file p9.File `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
// path is the sentry path where this endpoint is bound.
path string
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 576c57491..71581736c 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -34,11 +34,13 @@ import (
// special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
// in effect) regular files. specialFileFD differs from regularFileFD by using
// per-FD handles instead of shared per-dentry handles, and never buffering I/O.
+//
+// +stateify savable
type specialFileFD struct {
fileDescription
// handle is used for file I/O. handle is immutable.
- handle handle
+ handle handle `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
// isRegularFile is true if this FD represents a regular file which is only
// possible when filesystemOptions.regularFilesUseSpecialFileFD is in
@@ -56,7 +58,7 @@ type specialFileFD struct {
queue waiter.Queue
// If seekable is true, off is the file offset. off is protected by mu.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
off int64
}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index c0cef3453..ffe4ddb32 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -137,6 +137,8 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
}
// filesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type filesystemType struct{}
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
@@ -166,6 +168,8 @@ func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) {
}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -185,6 +189,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
}
// inode implements kernfs.Inode.
+//
+// +stateify savable
type inode struct {
kernfs.InodeNoStatFS
kernfs.InodeNotDirectory
@@ -233,7 +239,7 @@ type inode struct {
canMap bool
// mapsMu protects mappings.
- mapsMu sync.Mutex
+ mapsMu sync.Mutex `state:"nosave"`
// If canMap is true, mappings tracks mappings of hostFD into
// memmap.MappingSpaces.
@@ -511,6 +517,8 @@ func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flag
}
// fileDescription is embedded by host fd implementations of FileDescriptionImpl.
+//
+// +stateify savable
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -525,7 +533,7 @@ type fileDescription struct {
inode *inode
// offsetMu protects offset.
- offsetMu sync.Mutex
+ offsetMu sync.Mutex `state:"nosave"`
// offset specifies the current file offset. It is only meaningful when
// inode.seekable is true.
diff --git a/pkg/sentry/fsimpl/host/mmap.go b/pkg/sentry/fsimpl/host/mmap.go
index 65d3af38c..b51a17bed 100644
--- a/pkg/sentry/fsimpl/host/mmap.go
+++ b/pkg/sentry/fsimpl/host/mmap.go
@@ -27,11 +27,13 @@ import (
// cannot implement both kernfs.Inode.IncRef and memmap.File.IncRef.
//
// inodePlatformFile should only be used if inode.canMap is true.
+//
+// +stateify savable
type inodePlatformFile struct {
*inode
// fdRefsMu protects fdRefs.
- fdRefsMu sync.Mutex
+ fdRefsMu sync.Mutex `state:"nosave"`
// fdRefs counts references on memmap.File offsets. It is used solely for
// memory accounting.
@@ -41,7 +43,7 @@ type inodePlatformFile struct {
fileMapper fsutil.HostFileMapper
// fileMapperInitOnce is used to lazily initialize fileMapper.
- fileMapperInitOnce sync.Once
+ fileMapperInitOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
}
// IncRef implements memmap.File.IncRef.
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index e02b9b8f6..f5c596fec 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -30,6 +30,8 @@ import (
// TTYFileDescription implements vfs.FileDescriptionImpl for a host file
// descriptor that wraps a TTY FD.
+//
+// +stateify savable
type TTYFileDescription struct {
fileDescription
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 7d040481f..b929118b1 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -58,7 +58,7 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, devMajor, devMinor uint
// Open implements Inode.Open.
func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd := &DynamicBytesFD{}
- if err := fd.Init(rp.Mount(), d.VFSDentry(), f.data, &f.locks, opts.Flags); err != nil {
+ if err := fd.Init(rp.Mount(), d, f.data, &f.locks, opts.Flags); err != nil {
return nil, err
}
return &fd.vfsfd, nil
@@ -87,12 +87,12 @@ type DynamicBytesFD struct {
}
// Init initializes a DynamicBytesFD.
-func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *vfs.FileLocks, flags uint32) error {
+func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *Dentry, data vfs.DynamicBytesSource, locks *vfs.FileLocks, flags uint32) error {
fd.LockFD.Init(locks)
- if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(fd, flags, m, d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
return err
}
- fd.inode = d.Impl().(*Dentry).inode
+ fd.inode = d.inode
fd.SetDataSource(data)
return nil
}
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 2b4294228..0a4cd4057 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -29,6 +29,8 @@ import (
)
// SeekEndConfig describes the SEEK_END behaviour for FDs.
+//
+// +stateify savable
type SeekEndConfig int
// Constants related to SEEK_END behaviour for FDs.
@@ -41,6 +43,8 @@ const (
)
// GenericDirectoryFDOptions contains configuration for a GenericDirectoryFD.
+//
+// +stateify savable
type GenericDirectoryFDOptions struct {
SeekEnd SeekEndConfig
}
@@ -56,6 +60,8 @@ type GenericDirectoryFDOptions struct {
// Must be initialize with Init before first use.
//
// Lock ordering: mu => children.mu.
+//
+// +stateify savable
type GenericDirectoryFD struct {
vfs.FileDescriptionDefaultImpl
vfs.DirectoryFileDescriptionDefaultImpl
@@ -68,7 +74,7 @@ type GenericDirectoryFD struct {
children *OrderedChildren
// mu protects the fields below.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// off is the current directory offset. Protected by "mu".
off int64
@@ -195,8 +201,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
// these.
childIdx := fd.off - 2
for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() {
- inode := it.Dentry.Impl().(*Dentry).inode
- stat, err := inode.Stat(ctx, fd.filesystem(), opts)
+ stat, err := it.Dentry.inode.Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index daad1e04a..5cc1c4281 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -37,8 +37,7 @@ import (
// * !rp.Done().
//
// Postcondition: Caller must call fs.processDeferredDecRefs*.
-func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, mayFollowSymlinks bool) (*vfs.Dentry, error) {
- d := vfsd.Impl().(*Dentry)
+func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, mayFollowSymlinks bool) (*Dentry, error) {
if !d.isDir() {
return nil, syserror.ENOTDIR
}
@@ -55,20 +54,20 @@ afterSymlink:
// calls d_revalidate(), but walk_component() => handle_dots() does not.
if name == "." {
rp.Advance()
- return vfsd, nil
+ return d, nil
}
if name == ".." {
- if isRoot, err := rp.CheckRoot(ctx, vfsd); err != nil {
+ if isRoot, err := rp.CheckRoot(ctx, d.VFSDentry()); err != nil {
return nil, err
} else if isRoot || d.parent == nil {
rp.Advance()
- return vfsd, nil
+ return d, nil
}
- if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
+ if err := rp.CheckMount(ctx, d.parent.VFSDentry()); err != nil {
return nil, err
}
rp.Advance()
- return &d.parent.vfsd, nil
+ return d.parent, nil
}
if len(name) > linux.NAME_MAX {
return nil, syserror.ENAMETOOLONG
@@ -79,7 +78,7 @@ afterSymlink:
if err != nil {
return nil, err
}
- if err := rp.CheckMount(ctx, &next.vfsd); err != nil {
+ if err := rp.CheckMount(ctx, next.VFSDentry()); err != nil {
return nil, err
}
// Resolve any symlink at current path component.
@@ -102,7 +101,7 @@ afterSymlink:
goto afterSymlink
}
rp.Advance()
- return &next.vfsd, nil
+ return next, nil
}
// revalidateChildLocked must be called after a call to parent.vfsd.Child(name)
@@ -122,7 +121,7 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
if !child.inode.Valid(ctx) {
delete(parent.children, name)
vfsObj.InvalidateDentry(ctx, &child.vfsd)
- fs.deferDecRef(&child.vfsd) // Reference from Lookup.
+ fs.deferDecRef(child) // Reference from Lookup.
child = nil
}
}
@@ -133,7 +132,8 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
if err != nil {
return nil, err
}
- // Reference on childVFSD dropped by a corresponding Valid.
+ // Reference on c (provided by Lookup) will be dropped when the dentry
+ // fails validation.
parent.InsertChildLocked(name, c)
child = c
}
@@ -149,15 +149,14 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
//
// Postconditions: Caller must call fs.processDeferredDecRefs*.
func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) {
- vfsd := rp.Start()
+ d := rp.Start().Impl().(*Dentry)
for !rp.Done() {
var err error
- vfsd, err = fs.stepExistingLocked(ctx, rp, vfsd, true /* mayFollowSymlinks */)
+ d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */)
if err != nil {
return nil, err
}
}
- d := vfsd.Impl().(*Dentry)
if rp.MustBeDir() && !d.isDir() {
return nil, syserror.ENOTDIR
}
@@ -176,20 +175,19 @@ func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingP
// * !rp.Done().
//
// Postconditions: Caller must call fs.processDeferredDecRefs*.
-func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, Inode, error) {
- vfsd := rp.Start()
+func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) {
+ d := rp.Start().Impl().(*Dentry)
for !rp.Final() {
var err error
- vfsd, err = fs.stepExistingLocked(ctx, rp, vfsd, true /* mayFollowSymlinks */)
+ d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */)
if err != nil {
- return nil, nil, err
+ return nil, err
}
}
- d := vfsd.Impl().(*Dentry)
if !d.isDir() {
- return nil, nil, syserror.ENOTDIR
+ return nil, syserror.ENOTDIR
}
- return vfsd, d.inode, nil
+ return d, nil
}
// checkCreateLocked checks that a file named rp.Component() may be created in
@@ -197,10 +195,9 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
//
// Preconditions:
// * Filesystem.mu must be locked for at least reading.
-// * parentInode == parentVFSD.Impl().(*Dentry).Inode.
// * isDir(parentInode) == true.
-func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode Inode) (string, error) {
- if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
+func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *Dentry) (string, error) {
+ if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
return "", err
}
pc := rp.Component()
@@ -210,11 +207,10 @@ func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parentVFSD *v
if len(pc) > linux.NAME_MAX {
return "", syserror.ENAMETOOLONG
}
- // FIXME(gvisor.dev/issue/1193): Data race due to not holding dirMu.
- if _, ok := parentVFSD.Impl().(*Dentry).children[pc]; ok {
+ if _, ok := parent.children[pc]; ok {
return "", syserror.EEXIST
}
- if parentVFSD.IsDead() {
+ if parent.VFSDentry().IsDead() {
return "", syserror.ENOENT
}
return pc, nil
@@ -288,12 +284,12 @@ func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
fs.mu.RLock()
defer fs.processDeferredDecRefs(ctx)
defer fs.mu.RUnlock()
- vfsd, _, err := fs.walkParentDirLocked(ctx, rp)
+ d, err := fs.walkParentDirLocked(ctx, rp)
if err != nil {
return nil, err
}
- vfsd.IncRef() // Ownership transferred to caller.
- return vfsd, nil
+ d.IncRef() // Ownership transferred to caller.
+ return d.VFSDentry(), nil
}
// LinkAt implements vfs.FilesystemImpl.LinkAt.
@@ -303,12 +299,15 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
}
fs.mu.Lock()
defer fs.mu.Unlock()
- parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
+ parent, err := fs.walkParentDirLocked(ctx, rp)
fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
- pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
+
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+ pc, err := checkCreateLocked(ctx, rp, parent)
if err != nil {
return err
}
@@ -325,11 +324,11 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EPERM
}
- childVFSD, err := parentInode.NewLink(ctx, pc, d.inode)
+ child, err := parent.inode.NewLink(ctx, pc, d.inode)
if err != nil {
return err
}
- parentVFSD.Impl().(*Dentry).InsertChild(pc, childVFSD.Impl().(*Dentry))
+ parent.InsertChildLocked(pc, child)
return nil
}
@@ -340,12 +339,15 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}
fs.mu.Lock()
defer fs.mu.Unlock()
- parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
+ parent, err := fs.walkParentDirLocked(ctx, rp)
fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
- pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
+
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+ pc, err := checkCreateLocked(ctx, rp, parent)
if err != nil {
return err
}
@@ -353,14 +355,14 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- childVFSD, err := parentInode.NewDir(ctx, pc, opts)
+ child, err := parent.inode.NewDir(ctx, pc, opts)
if err != nil {
if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
return err
}
- childVFSD = newSyntheticDirectory(rp.Credentials(), opts.Mode)
+ child = newSyntheticDirectory(rp.Credentials(), opts.Mode)
}
- parentVFSD.Impl().(*Dentry).InsertChild(pc, childVFSD.Impl().(*Dentry))
+ parent.InsertChildLocked(pc, child)
return nil
}
@@ -371,12 +373,15 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}
fs.mu.Lock()
defer fs.mu.Unlock()
- parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
+ parent, err := fs.walkParentDirLocked(ctx, rp)
fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
- pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
+
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+ pc, err := checkCreateLocked(ctx, rp, parent)
if err != nil {
return err
}
@@ -384,11 +389,11 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- newVFSD, err := parentInode.NewNode(ctx, pc, opts)
+ newD, err := parent.inode.NewNode(ctx, pc, opts)
if err != nil {
return err
}
- parentVFSD.Impl().(*Dentry).InsertChild(pc, newVFSD.Impl().(*Dentry))
+ parent.InsertChildLocked(pc, newD)
return nil
}
@@ -450,13 +455,13 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
return d.inode.Open(ctx, rp, d, opts)
}
afterTrailingSymlink:
- parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
+ parent, err := fs.walkParentDirLocked(ctx, rp)
fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return nil, err
}
// Check for search permission in the parent directory.
- if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
+ if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
}
// Reject attempts to open directories with O_CREAT.
@@ -471,10 +476,10 @@ afterTrailingSymlink:
return nil, syserror.ENAMETOOLONG
}
// Determine whether or not we need to create a file.
- childVFSD, err := fs.stepExistingLocked(ctx, rp, parentVFSD, false /* mayFollowSymlinks */)
+ child, err := fs.stepExistingLocked(ctx, rp, parent, false /* mayFollowSymlinks */)
if err == syserror.ENOENT {
// Already checked for searchability above; now check for writability.
- if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
+ if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
return nil, err
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
@@ -482,12 +487,14 @@ afterTrailingSymlink:
}
defer rp.Mount().EndWrite()
// Create and open the child.
- childVFSD, err = parentInode.NewFile(ctx, pc, opts)
+ child, err := parent.inode.NewFile(ctx, pc, opts)
if err != nil {
return nil, err
}
- child := childVFSD.Impl().(*Dentry)
- parentVFSD.Impl().(*Dentry).InsertChild(pc, child)
+ // FIXME(gvisor.dev/issue/1193): Race between checking existence with
+ // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold
+ // dirMu from one to the other.
+ parent.InsertChild(pc, child)
child.inode.IncRef()
defer child.inode.DecRef(ctx)
unlock()
@@ -500,7 +507,6 @@ afterTrailingSymlink:
if mustCreate {
return nil, syserror.EEXIST
}
- child := childVFSD.Impl().(*Dentry)
if rp.ShouldFollowSymlink() && child.isSymlink() {
targetVD, targetPathname, err := child.inode.Getlink(ctx, rp.Mount())
if err != nil {
@@ -559,11 +565,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// Resolve the destination directory first to verify that it's on this
// Mount.
- dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp)
+ dstDir, err := fs.walkParentDirLocked(ctx, rp)
if err != nil {
return err
}
- dstDir := dstDirVFSD.Impl().(*Dentry)
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
return syserror.EXDEV
@@ -589,7 +594,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// Can we create the dst dentry?
var dst *Dentry
- pc, err := checkCreateLocked(ctx, rp, dstDirVFSD, dstDirInode)
+ pc, err := checkCreateLocked(ctx, rp, dstDir)
switch err {
case nil:
// Ok, continue with rename as replacement.
@@ -600,14 +605,14 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
dst = dstDir.children[pc]
if dst == nil {
- panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", pc, dstDirVFSD))
+ panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", pc, dstDir))
}
default:
return err
}
var dstVFSD *vfs.Dentry
if dst != nil {
- dstVFSD = &dst.vfsd
+ dstVFSD = dst.VFSDentry()
}
mntns := vfs.MountNamespaceFromContext(ctx)
@@ -627,14 +632,14 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil {
return err
}
- replaced, err := srcDir.inode.Rename(ctx, src.name, pc, srcVFSD, dstDirVFSD)
+ replaced, err := srcDir.inode.Rename(ctx, src.name, pc, src, dstDir)
if err != nil {
virtfs.AbortRenameDentry(srcVFSD, dstVFSD)
return err
}
delete(srcDir.children, src.name)
if srcDir != dstDir {
- fs.deferDecRef(srcDirVFSD)
+ fs.deferDecRef(srcDir)
dstDir.IncRef()
}
src.parent = dstDir
@@ -643,7 +648,11 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
dstDir.children = make(map[string]*Dentry)
}
dstDir.children[pc] = src
- virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaced)
+ var replaceVFSD *vfs.Dentry
+ if replaced != nil {
+ replaceVFSD = replaced.VFSDentry()
+ }
+ virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
return nil
}
@@ -682,7 +691,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
- if err := parentDentry.inode.RmDir(ctx, d.name, d.VFSDentry()); err != nil {
+ if err := parentDentry.inode.RmDir(ctx, d.name, d); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
@@ -736,12 +745,15 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
}
fs.mu.Lock()
defer fs.mu.Unlock()
- parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
+ parent, err := fs.walkParentDirLocked(ctx, rp)
fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
- pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+
+ pc, err := checkCreateLocked(ctx, rp, parent)
if err != nil {
return err
}
@@ -749,11 +761,11 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
defer rp.Mount().EndWrite()
- childVFSD, err := parentInode.NewSymlink(ctx, pc, target)
+ child, err := parent.inode.NewSymlink(ctx, pc, target)
if err != nil {
return err
}
- parentVFSD.Impl().(*Dentry).InsertChild(pc, childVFSD.Impl().(*Dentry))
+ parent.InsertChildLocked(pc, child)
return nil
}
@@ -787,7 +799,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
return err
}
- if err := parentDentry.inode.Unlink(ctx, d.name, d.VFSDentry()); err != nil {
+ if err := parentDentry.inode.Unlink(ctx, d.name, d); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 5cc196980..49210e748 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -31,6 +31,8 @@ import (
// count for inodes, performing no extra actions when references are obtained or
// released. This is suitable for simple file inodes that don't reference any
// resources.
+//
+// +stateify savable
type InodeNoopRefCount struct {
}
@@ -50,30 +52,32 @@ func (InodeNoopRefCount) TryIncRef() bool {
// InodeDirectoryNoNewChildren partially implements the Inode interface.
// InodeDirectoryNoNewChildren represents a directory inode which does not
// support creation of new children.
+//
+// +stateify savable
type InodeDirectoryNoNewChildren struct{}
// NewFile implements Inode.NewFile.
-func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewLink implements Inode.NewLink.
-func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
return nil, syserror.EPERM
}
@@ -81,6 +85,8 @@ func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOpt
// inodeDirectory and inodeDynamicDirectory sub interfaces. Inodes that do not
// represent directories can embed this to provide no-op implementations for
// directory-related functions.
+//
+// +stateify savable
type InodeNotDirectory struct {
}
@@ -90,42 +96,42 @@ func (InodeNotDirectory) HasChildren() bool {
}
// NewFile implements Inode.NewFile.
-func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) {
+func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
panic("NewFile called on non-directory inode")
}
// NewDir implements Inode.NewDir.
-func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) {
+func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
panic("NewDir called on non-directory inode")
}
// NewLink implements Inode.NewLinkink.
-func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) {
+func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*Dentry, error) {
panic("NewLink called on non-directory inode")
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) {
+func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*Dentry, error) {
panic("NewSymlink called on non-directory inode")
}
// NewNode implements Inode.NewNode.
-func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) {
+func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
panic("NewNode called on non-directory inode")
}
// Unlink implements Inode.Unlink.
-func (InodeNotDirectory) Unlink(context.Context, string, *vfs.Dentry) error {
+func (InodeNotDirectory) Unlink(context.Context, string, *Dentry) error {
panic("Unlink called on non-directory inode")
}
// RmDir implements Inode.RmDir.
-func (InodeNotDirectory) RmDir(context.Context, string, *vfs.Dentry) error {
+func (InodeNotDirectory) RmDir(context.Context, string, *Dentry) error {
panic("RmDir called on non-directory inode")
}
// Rename implements Inode.Rename.
-func (InodeNotDirectory) Rename(context.Context, string, string, *vfs.Dentry, *vfs.Dentry) (*vfs.Dentry, error) {
+func (InodeNotDirectory) Rename(context.Context, string, string, *Dentry, *Dentry) (*Dentry, error) {
panic("Rename called on non-directory inode")
}
@@ -149,6 +155,8 @@ func (InodeNotDirectory) Valid(context.Context) bool {
// dymanic entries (i.e. entries that are not "hashed" into the
// vfs.Dentry.children) can embed this to provide no-op implementations for
// functions related to dynamic entries.
+//
+// +stateify savable
type InodeNoDynamicLookup struct{}
// Lookup implements Inode.Lookup.
@@ -169,6 +177,8 @@ func (InodeNoDynamicLookup) Valid(ctx context.Context) bool {
// InodeNotSymlink partially implements the Inode interface, specifically the
// inodeSymlink sub interface. All inodes that are not symlinks may embed this
// to return the appropriate errors from symlink-related functions.
+//
+// +stateify savable
type InodeNotSymlink struct{}
// Readlink implements Inode.Readlink.
@@ -186,6 +196,8 @@ func (InodeNotSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry,
// inode attributes.
//
// Must be initialized by Init prior to first use.
+//
+// +stateify savable
type InodeAttrs struct {
devMajor uint32
devMinor uint32
@@ -330,13 +342,16 @@ func (a *InodeAttrs) DecLinks() {
}
}
+// +stateify savable
type slot struct {
Name string
- Dentry *vfs.Dentry
+ Dentry *Dentry
slotEntry
}
// OrderedChildrenOptions contains initialization options for OrderedChildren.
+//
+// +stateify savable
type OrderedChildrenOptions struct {
// Writable indicates whether vfs.FilesystemImpl methods implemented by
// OrderedChildren may modify the tracked children. This applies to
@@ -352,12 +367,14 @@ type OrderedChildrenOptions struct {
// directories.
//
// Must be initialize with Init before first use.
+//
+// +stateify savable
type OrderedChildren struct {
// Can children be modified by user syscalls? It set to false, interface
// methods that would modify the children return EPERM. Immutable.
writable bool
- mu sync.RWMutex
+ mu sync.RWMutex `state:"nosave"`
order slotList
set map[string]*slot
}
@@ -390,7 +407,7 @@ func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint3
if child.isDir() {
links++
}
- if err := o.Insert(name, child.VFSDentry()); err != nil {
+ if err := o.Insert(name, child); err != nil {
panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d))
}
d.InsertChild(name, child)
@@ -407,7 +424,7 @@ func (o *OrderedChildren) HasChildren() bool {
// Insert inserts child into o. This ignores the writability of o, as this is
// not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
-func (o *OrderedChildren) Insert(name string, child *vfs.Dentry) error {
+func (o *OrderedChildren) Insert(name string, child *Dentry) error {
o.mu.Lock()
defer o.mu.Unlock()
if _, ok := o.set[name]; ok {
@@ -431,10 +448,10 @@ func (o *OrderedChildren) removeLocked(name string) {
}
// Precondition: caller must hold o.mu for writing.
-func (o *OrderedChildren) replaceChildLocked(name string, new *vfs.Dentry) *vfs.Dentry {
+func (o *OrderedChildren) replaceChildLocked(name string, new *Dentry) *Dentry {
if s, ok := o.set[name]; ok {
// Existing slot with given name, simply replace the dentry.
- var old *vfs.Dentry
+ var old *Dentry
old, s.Dentry = s.Dentry, new
return old
}
@@ -450,7 +467,7 @@ func (o *OrderedChildren) replaceChildLocked(name string, new *vfs.Dentry) *vfs.
}
// Precondition: caller must hold o.mu for reading or writing.
-func (o *OrderedChildren) checkExistingLocked(name string, child *vfs.Dentry) error {
+func (o *OrderedChildren) checkExistingLocked(name string, child *Dentry) error {
s, ok := o.set[name]
if !ok {
return syserror.ENOENT
@@ -462,7 +479,7 @@ func (o *OrderedChildren) checkExistingLocked(name string, child *vfs.Dentry) er
}
// Unlink implements Inode.Unlink.
-func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *vfs.Dentry) error {
+func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry) error {
if !o.writable {
return syserror.EPERM
}
@@ -478,12 +495,13 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *vfs.De
}
// Rmdir implements Inode.Rmdir.
-func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *vfs.Dentry) error {
+func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *Dentry) error {
// We're not responsible for checking that child is a directory, that it's
// empty, or updating any link counts; so this is the same as unlink.
return o.Unlink(ctx, name, child)
}
+// +stateify savable
type renameAcrossDifferentImplementationsError struct{}
func (renameAcrossDifferentImplementationsError) Error() string {
@@ -499,8 +517,8 @@ func (renameAcrossDifferentImplementationsError) Error() string {
// that will support Rename.
//
// Postcondition: reference on any replaced dentry transferred to caller.
-func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *vfs.Dentry) (*vfs.Dentry, error) {
- dst, ok := dstDir.Impl().(*Dentry).inode.(interface{}).(*OrderedChildren)
+func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (*Dentry, error) {
+ dst, ok := dstDir.inode.(interface{}).(*OrderedChildren)
if !ok {
return nil, renameAcrossDifferentImplementationsError{}
}
@@ -542,6 +560,8 @@ func (o *OrderedChildren) nthLocked(i int64) *slot {
}
// InodeSymlink partially implements Inode interface for symlinks.
+//
+// +stateify savable
type InodeSymlink struct {
InodeNotDirectory
}
@@ -615,6 +635,8 @@ func (s *StaticDirectory) DecRef(context.Context) {
}
// AlwaysValid partially implements kernfs.inodeDynamicLookup.
+//
+// +stateify savable
type AlwaysValid struct{}
// Valid implements kernfs.inodeDynamicLookup.Valid.
@@ -624,6 +646,8 @@ func (*AlwaysValid) Valid(context.Context) bool {
// InodeNoStatFS partially implements the Inode interface, where the client
// filesystem doesn't support statfs(2).
+//
+// +stateify savable
type InodeNoStatFS struct{}
// StatFS implements Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 14bf43ede..6d3d79333 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -29,7 +29,7 @@
//
// Reference Model:
//
-// Kernfs dentries represents named pointers to inodes. Dentries and inode have
+// Kernfs dentries represents named pointers to inodes. Dentries and inodes have
// independent lifetimes and reference counts. A child dentry unconditionally
// holds a reference on its parent directory's dentry. A dentry also holds a
// reference on the inode it points to. Multiple dentries can point to the same
@@ -66,15 +66,17 @@ import (
// Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory
// filesystem. Concrete implementations are expected to embed this in their own
// Filesystem type.
+//
+// +stateify savable
type Filesystem struct {
vfsfs vfs.Filesystem
- droppedDentriesMu sync.Mutex
+ droppedDentriesMu sync.Mutex `state:"nosave"`
// droppedDentries is a list of dentries waiting to be DecRef()ed. This is
// used to defer dentry destruction until mu can be acquired for
// writing. Protected by droppedDentriesMu.
- droppedDentries []*vfs.Dentry
+ droppedDentries []*Dentry
// mu synchronizes the lifetime of Dentries on this filesystem. Holding it
// for reading guarantees continued existence of any resolved dentries, but
@@ -97,7 +99,7 @@ type Filesystem struct {
// defer fs.mu.RUnlock()
// ...
// fs.deferDecRef(dentry)
- mu sync.RWMutex
+ mu sync.RWMutex `state:"nosave"`
// nextInoMinusOne is used to to allocate inode numbers on this
// filesystem. Must be accessed by atomic operations.
@@ -108,7 +110,7 @@ type Filesystem struct {
// processDeferredDecRefs{,Locked}. See comment on Filesystem.mu.
//
// Precondition: d must not already be pending destruction.
-func (fs *Filesystem) deferDecRef(d *vfs.Dentry) {
+func (fs *Filesystem) deferDecRef(d *Dentry) {
fs.droppedDentriesMu.Lock()
fs.droppedDentries = append(fs.droppedDentries, d)
fs.droppedDentriesMu.Unlock()
@@ -160,6 +162,8 @@ const (
// to, and child dentries hold a reference on their parent.
//
// Must be initialized by Init prior to first use.
+//
+// +stateify savable
type Dentry struct {
DentryRefs
@@ -173,7 +177,11 @@ type Dentry struct {
name string
// dirMu protects children and the names of child Dentries.
- dirMu sync.Mutex
+ //
+ // Note that holding fs.mu for writing is not sufficient;
+ // revalidateChildLocked(), which is a very hot path, may modify children with
+ // fs.mu acquired for reading only.
+ dirMu sync.Mutex `state:"nosave"`
children map[string]*Dentry
inode Inode
@@ -240,9 +248,8 @@ func (d *Dentry) Watches() *vfs.Watches {
func (d *Dentry) OnZeroWatches(context.Context) {}
// InsertChild inserts child into the vfs dentry cache with the given name under
-// this dentry. This does not update the directory inode, so calling this on
-// its own isn't sufficient to insert a child into a directory. InsertChild
-// updates the link count on d if required.
+// this dentry. This does not update the directory inode, so calling this on its
+// own isn't sufficient to insert a child into a directory.
//
// Precondition: d must represent a directory inode.
func (d *Dentry) InsertChild(name string, child *Dentry) {
@@ -254,10 +261,12 @@ func (d *Dentry) InsertChild(name string, child *Dentry) {
// InsertChildLocked is equivalent to InsertChild, with additional
// preconditions.
//
-// Precondition: d.dirMu must be locked.
+// Preconditions:
+// * d must represent a directory inode.
+// * d.dirMu must be locked.
func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
if !d.isDir() {
- panic(fmt.Sprintf("InsertChild called on non-directory Dentry: %+v.", d))
+ panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d))
}
d.IncRef() // DecRef in child's Dentry.destroy.
child.parent = d
@@ -273,7 +282,7 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
// isn't sufficient to remove a child from a directory.
//
// Precondition: d must represent a directory inode.
-func (d *Dentry) RemoveChild(name string, child *vfs.Dentry) error {
+func (d *Dentry) RemoveChild(name string, child *Dentry) error {
d.dirMu.Lock()
defer d.dirMu.Unlock()
return d.RemoveChildLocked(name, child)
@@ -283,7 +292,7 @@ func (d *Dentry) RemoveChild(name string, child *vfs.Dentry) error {
// preconditions.
//
// Precondition: d.dirMu must be locked.
-func (d *Dentry) RemoveChildLocked(name string, child *vfs.Dentry) error {
+func (d *Dentry) RemoveChildLocked(name string, child *Dentry) error {
if !d.isDir() {
panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d))
}
@@ -291,7 +300,7 @@ func (d *Dentry) RemoveChildLocked(name string, child *vfs.Dentry) error {
if !ok {
return syserror.ENOENT
}
- if &c.vfsd != child {
+ if c != child {
panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child))
}
delete(d.children, name)
@@ -318,7 +327,6 @@ func (d *Dentry) Inode() Inode {
//
// - Checking that dentries passed to methods are of the appropriate file type.
// - Checking permissions.
-// - Updating link and reference counts.
//
// Specific responsibilities of implementations are documented below.
type Inode interface {
@@ -328,7 +336,8 @@ type Inode interface {
inodeRefs
// Methods related to node metadata. A generic implementation is provided by
- // InodeAttrs.
+ // InodeAttrs. Note that a concrete filesystem using kernfs is responsible for
+ // managing link counts.
inodeMetadata
// Method for inodes that represent symlink. InodeNotSymlink provides a
@@ -346,7 +355,7 @@ type Inode interface {
// Open creates a file description for the filesystem object represented by
// this inode. The returned file description should hold a reference on the
- // inode for its lifetime.
+ // dentry for its lifetime.
//
// Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing
// the inode on which Open() is being called.
@@ -400,30 +409,30 @@ type inodeDirectory interface {
HasChildren() bool
// NewFile creates a new regular file inode.
- NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error)
+ NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error)
// NewDir creates a new directory inode.
- NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error)
+ NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error)
// NewLink creates a new hardlink to a specified inode in this
// directory. Implementations should create a new kernfs Dentry pointing to
// target, and update target's link count.
- NewLink(ctx context.Context, name string, target Inode) (*vfs.Dentry, error)
+ NewLink(ctx context.Context, name string, target Inode) (*Dentry, error)
// NewSymlink creates a new symbolic link inode.
- NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error)
+ NewSymlink(ctx context.Context, name, target string) (*Dentry, error)
// NewNode creates a new filesystem node for a mknod syscall.
- NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error)
+ NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error)
// Unlink removes a child dentry from this directory inode.
- Unlink(ctx context.Context, name string, child *vfs.Dentry) error
+ Unlink(ctx context.Context, name string, child *Dentry) error
// RmDir removes an empty child directory from this directory
// inode. Implementations must update the parent directory's link count,
// if required. Implementations are not responsible for checking that child
// is a directory, checking for an empty directory.
- RmDir(ctx context.Context, name string, child *vfs.Dentry) error
+ RmDir(ctx context.Context, name string, child *Dentry) error
// Rename is called on the source directory containing an inode being
// renamed. child should point to the resolved child in the source
@@ -431,7 +440,7 @@ type inodeDirectory interface {
// should return the replaced dentry or nil otherwise.
//
// Precondition: Caller must serialize concurrent calls to Rename.
- Rename(ctx context.Context, oldname, newname string, child, dstDir *vfs.Dentry) (replaced *vfs.Dentry, err error)
+ Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (replaced *Dentry, err error)
}
type inodeDynamicLookup interface {
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index c9f81d734..e413242dc 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -176,38 +176,36 @@ func (d *dir) DecRef(context.Context) {
d.dirRefs.DecRef(d.Destroy)
}
-func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
+func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*kernfs.Dentry, error) {
creds := auth.CredentialsFromContext(ctx)
dir := d.fs.newDir(creds, opts.Mode, nil)
- dirVFSD := dir.VFSDentry()
- if err := d.OrderedChildren.Insert(name, dirVFSD); err != nil {
+ if err := d.OrderedChildren.Insert(name, dir); err != nil {
dir.DecRef(ctx)
return nil, err
}
d.IncLinks(1)
- return dirVFSD, nil
+ return dir, nil
}
-func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
+func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*kernfs.Dentry, error) {
creds := auth.CredentialsFromContext(ctx)
f := d.fs.newFile(creds, "")
- fVFSD := f.VFSDentry()
- if err := d.OrderedChildren.Insert(name, fVFSD); err != nil {
+ if err := d.OrderedChildren.Insert(name, f); err != nil {
f.DecRef(ctx)
return nil, err
}
- return fVFSD, nil
+ return f, nil
}
-func (*dir) NewLink(context.Context, string, kernfs.Inode) (*vfs.Dentry, error) {
+func (*dir) NewLink(context.Context, string, kernfs.Inode) (*kernfs.Dentry, error) {
return nil, syserror.EPERM
}
-func (*dir) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) {
+func (*dir) NewSymlink(context.Context, string, string) (*kernfs.Dentry, error) {
return nil, syserror.EPERM
}
-func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) {
+func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (*kernfs.Dentry, error) {
return nil, syserror.EPERM
}
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 443121c99..58a93eaac 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -24,6 +24,8 @@ import (
// StaticSymlink provides an Inode implementation for symlinks that point to
// a immutable target.
+//
+// +stateify savable
type StaticSymlink struct {
InodeAttrs
InodeNoopRefCount
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
index 845de6c1d..ea7f073eb 100644
--- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -41,12 +41,12 @@ type syntheticDirectory struct {
var _ Inode = (*syntheticDirectory)(nil)
-func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *vfs.Dentry {
+func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *Dentry {
inode := &syntheticDirectory{}
inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm)
d := &Dentry{}
d.Init(inode)
- return &d.vfsd
+ return d
}
func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
@@ -69,12 +69,12 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath,
}
// NewFile implements Inode.NewFile.
-func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
+func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
+func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) {
if !opts.ForSyntheticMountpoint {
return nil, syserror.EPERM
}
@@ -87,16 +87,16 @@ func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs
}
// NewLink implements Inode.NewLink.
-func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*vfs.Dentry, error) {
+func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error) {
+func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*Dentry, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) {
+func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) {
return nil, syserror.EPERM
}
diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go
index 7ab42e71e..df4492346 100644
--- a/pkg/sentry/fsimpl/overlay/directory.go
+++ b/pkg/sentry/fsimpl/overlay/directory.go
@@ -100,12 +100,13 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string
return whiteouts, readdirErr
}
+// +stateify savable
type directoryFD struct {
fileDescription
vfs.DirectoryFileDescriptionDefaultImpl
vfs.DentryMetadataFileDescriptionImpl
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
off int64
dirents []vfs.Dirent
}
@@ -116,10 +117,12 @@ func (fd *directoryFD) Release(ctx context.Context) {
// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+ d := fd.dentry()
+ defer d.InotifyWithParent(ctx, linux.IN_ACCESS, 0, vfs.PathEvent)
+
fd.mu.Lock()
defer fd.mu.Unlock()
- d := fd.dentry()
if fd.dirents == nil {
ds, err := d.getDirents(ctx)
if err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index e9ce4bde1..bd11372d5 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -499,7 +499,13 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
if err := create(parent, name, childLayer == lookupLayerUpperWhiteout); err != nil {
return err
}
+
parent.dirents = nil
+ ev := linux.IN_CREATE
+ if dir {
+ ev |= linux.IN_ISDIR
+ }
+ parent.watches.Notify(ctx, name, uint32(ev), 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
return nil
}
@@ -631,6 +637,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
}
return err
}
+ old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
return nil
})
}
@@ -975,6 +982,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
// just can't open it anymore for some reason.
return nil, err
}
+ parent.watches.Notify(ctx, childName, linux.IN_CREATE, 0 /* cookie */, vfs.PathEvent, false /* unlinked */)
return &fd.vfsfd, nil
}
@@ -1236,6 +1244,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
}
+ vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir())
return nil
}
@@ -1352,6 +1361,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
delete(parent.children, name)
ds = appendDentry(ds, child)
parent.dirents = nil
+ parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0 /* cookie */, vfs.InodeEvent, true /* unlinked */)
return nil
}
@@ -1359,12 +1369,25 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ return err
+ }
+ err = d.setStatLocked(ctx, rp, opts)
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ if err != nil {
return err
}
+ if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ d.InotifyWithParent(ctx, ev, 0 /* cookie */, vfs.InodeEvent)
+ }
+ return nil
+}
+
+// Precondition: d.fs.renameMu must be held for reading.
+func (d *dentry) setStatLocked(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
return err
@@ -1555,11 +1578,14 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout during UnlinkAt: %v", err))
}
+ var cw *vfs.Watches
if child != nil {
vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
delete(parent.children, name)
ds = appendDentry(ds, child)
+ cw = &child.watches
}
+ vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name)
parent.dirents = nil
return nil
}
@@ -1636,13 +1662,20 @@ func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Crede
func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
return err
}
- return fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts)
+ err = fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts)
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ if err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent)
+ return nil
}
// Precondition: fs.renameMu must be locked.
@@ -1673,13 +1706,20 @@ func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mo
func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
return err
}
- return fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name)
+ err = fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name)
+ fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ if err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent)
+ return nil
}
// Precondition: fs.renameMu must be locked.
diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go
index 6e04705c7..853aee951 100644
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/non_directory.go
@@ -39,6 +39,7 @@ func (d *dentry) readlink(ctx context.Context) (string, error) {
})
}
+// +stateify savable
type nonDirectoryFD struct {
fileDescription
@@ -47,7 +48,7 @@ type nonDirectoryFD struct {
// fileDescription.dentry().upperVD. cachedFlags is the last known value of
// cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are
// protected by mu.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
copiedUp bool
cachedFD *vfs.FileDescription
cachedFlags uint32
@@ -183,6 +184,9 @@ func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions)
return err
}
d.updateAfterSetStatLocked(&opts)
+ if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
+ }
return nil
}
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index d0d26185e..dfbccd05f 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -51,6 +51,8 @@ import (
const Name = "overlay"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// Name implements vfs.FilesystemType.Name.
@@ -60,6 +62,8 @@ func (FilesystemType) Name() string {
// FilesystemOptions may be passed as vfs.GetFilesystemOptions.InternalData to
// FilesystemType.GetFilesystem.
+//
+// +stateify savable
type FilesystemOptions struct {
// Callers passing FilesystemOptions to
// overlay.FilesystemType.GetFilesystem() are responsible for ensuring that
@@ -76,6 +80,8 @@ type FilesystemOptions struct {
}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
vfsfs vfs.Filesystem
@@ -98,7 +104,7 @@ type filesystem struct {
// renameMu synchronizes renaming with non-renaming operations in order to
// ensure consistent lock ordering between dentry.dirMu in different
// dentries.
- renameMu sync.RWMutex
+ renameMu sync.RWMutex `state:"nosave"`
// lastDirIno is the last inode number assigned to a directory. lastDirIno
// is accessed using atomic memory operations.
@@ -367,6 +373,8 @@ func (fs *filesystem) newDirIno() uint64 {
}
// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
type dentry struct {
vfsd vfs.Dentry
@@ -399,7 +407,7 @@ type dentry struct {
// and dirents (if not nil) is a cache of dirents as returned by
// directoryFDs representing this directory. children is protected by
// dirMu.
- dirMu sync.Mutex
+ dirMu sync.Mutex `state:"nosave"`
children map[string]*dentry
dirents []vfs.Dirent
@@ -409,7 +417,7 @@ type dentry struct {
// If !upperVD.Ok(), it can transition to a valid vfs.VirtualDentry (i.e.
// be copied up) with copyMu locked for writing; otherwise, it is
// immutable. lowerVDs is always immutable.
- copyMu sync.RWMutex
+ copyMu sync.RWMutex `state:"nosave"`
upperVD vfs.VirtualDentry
lowerVDs []vfs.VirtualDentry
@@ -454,6 +462,13 @@ type dentry struct {
isMappable uint32
locks vfs.FileLocks
+
+ // watches is the set of inotify watches on the file repesented by this dentry.
+ //
+ // Note that hard links to the same file will not share the same set of
+ // watches, due to the fact that we do not have inode structures in this
+ // overlay implementation.
+ watches vfs.Watches
}
// newDentry creates a new dentry. The dentry initially has no references; it
@@ -513,6 +528,14 @@ func (d *dentry) checkDropLocked(ctx context.Context) {
if atomic.LoadInt64(&d.refs) != 0 {
return
}
+
+ // Make sure that we do not lose watches on dentries that have not been
+ // deleted. Note that overlayfs never calls VFS.InvalidateDentry(), so
+ // d.vfsd.IsDead() indicates that d was deleted.
+ if !d.vfsd.IsDead() && d.watches.Size() > 0 {
+ return
+ }
+
// Refs is still zero; destroy it.
d.destroyLocked(ctx)
return
@@ -541,6 +564,8 @@ func (d *dentry) destroyLocked(ctx context.Context) {
lowerVD.DecRef(ctx)
}
+ d.watches.HandleDeletion(ctx)
+
if d.parent != nil {
d.parent.dirMu.Lock()
if !d.vfsd.IsDead() {
@@ -559,19 +584,36 @@ func (d *dentry) destroyLocked(ctx context.Context) {
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
func (d *dentry) InotifyWithParent(ctx context.Context, events uint32, cookie uint32, et vfs.EventType) {
- // TODO(gvisor.dev/issue/1479): Implement inotify.
+ if d.isDir() {
+ events |= linux.IN_ISDIR
+ }
+
+ // overlayfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates
+ // that d was deleted.
+ deleted := d.vfsd.IsDead()
+
+ d.fs.renameMu.RLock()
+ // The ordering below is important, Linux always notifies the parent first.
+ if d.parent != nil {
+ d.parent.watches.Notify(ctx, d.name, events, cookie, et, deleted)
+ }
+ d.watches.Notify(ctx, "", events, cookie, et, deleted)
+ d.fs.renameMu.RUnlock()
}
// Watches implements vfs.DentryImpl.Watches.
func (d *dentry) Watches() *vfs.Watches {
- // TODO(gvisor.dev/issue/1479): Implement inotify.
- return nil
+ return &d.watches
}
// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches.
-//
-// TODO(gvisor.dev/issue/1479): Implement inotify.
-func (d *dentry) OnZeroWatches(context.Context) {}
+func (d *dentry) OnZeroWatches(ctx context.Context) {
+ if atomic.LoadInt64(&d.refs) == 0 {
+ d.fs.renameMu.Lock()
+ d.checkDropLocked(ctx)
+ d.fs.renameMu.Unlock()
+ }
+}
// iterLayers invokes yield on each layer comprising d, from top to bottom. If
// any call to yield returns false, iterLayer stops iteration.
@@ -652,6 +694,8 @@ func (d *dentry) updateAfterSetStatLocked(opts *vfs.SetStatOptions) {
// fileDescription is embedded by overlay implementations of
// vfs.FileDescriptionImpl.
+//
+// +stateify savable
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -679,17 +723,33 @@ func (fd *fileDescription) GetXattr(ctx context.Context, opts vfs.GetXattrOption
// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
func (fd *fileDescription) SetXattr(ctx context.Context, opts vfs.SetXattrOptions) error {
fs := fd.filesystem()
+ d := fd.dentry()
+
fs.renameMu.RLock()
- defer fs.renameMu.RUnlock()
- return fs.setXattrLocked(ctx, fd.dentry(), fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), &opts)
+ err := fs.setXattrLocked(ctx, d, fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), &opts)
+ fs.renameMu.RUnlock()
+ if err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
fs := fd.filesystem()
+ d := fd.dentry()
+
fs.renameMu.RLock()
- defer fs.renameMu.RUnlock()
- return fs.removeXattrLocked(ctx, fd.dentry(), fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), name)
+ err := fs.removeXattrLocked(ctx, d, fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), name)
+ fs.renameMu.RUnlock()
+ if err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 33e247578..4e2da4810 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// +stateify savable
type filesystemType struct{}
// Name implements vfs.FilesystemType.Name.
@@ -43,6 +44,7 @@ func (filesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFile
panic("pipefs.filesystemType.GetFilesystem should never be called")
}
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -76,6 +78,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
}
// inode implements kernfs.Inode.
+//
+// +stateify savable
type inode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index 03b5941b9..05d7948ea 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -41,6 +41,7 @@ func (FilesystemType) Name() string {
return Name
}
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -84,6 +85,8 @@ func (fs *filesystem) Release(ctx context.Context) {
// dynamicInode is an overfitted interface for common Inodes with
// dynamicByteSource types used in procfs.
+//
+// +stateify savable
type dynamicInode interface {
kernfs.Inode
vfs.DynamicBytesSource
@@ -99,6 +102,7 @@ func (fs *filesystem) newDentry(creds *auth.Credentials, ino uint64, perm linux.
return d
}
+// +stateify savable
type staticFile struct {
kernfs.DynamicBytesFile
vfs.StaticData
@@ -118,10 +122,13 @@ func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64
// InternalData contains internal data passed in to the procfs mount via
// vfs.GetFilesystemOptions.InternalData.
+//
+// +stateify savable
type InternalData struct {
Cgroups map[string]string
}
+// +stateify savable
type implStatFS struct{}
// StatFS implements kernfs.Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 47dc0ac9a..47ecd941c 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -116,6 +116,7 @@ func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallb
return offset, nil
}
+// +stateify savable
type subtasksFD struct {
kernfs.GenericDirectoryFD
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index ae461bb48..1f99183eb 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -129,6 +129,8 @@ func (i *taskInode) DecRef(context.Context) {
// taskOwnedInode implements kernfs.Inode and overrides inode owner with task
// effective user and group.
+//
+// +stateify savable
type taskOwnedInode struct {
kernfs.Inode
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index c7104458f..0866cea2b 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -51,6 +51,7 @@ func taskFDExists(ctx context.Context, t *kernel.Task, fd int32) bool {
return true
}
+// +stateify savable
type fdDir struct {
locks vfs.FileLocks
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index feed5bc3f..b81c8279e 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -785,6 +785,7 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
return nil
}
+// +stateify savable
type namespaceSymlink struct {
kernfs.StaticSymlink
@@ -832,6 +833,8 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir
// namespaceInode is a synthetic inode created to represent a namespace in
// /proc/[pid]/ns/*.
+//
+// +stateify savable
type namespaceInode struct {
implStatFS
kernfs.InodeAttrs
@@ -865,6 +868,8 @@ func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *ker
// namespace FD is a synthetic file that represents a namespace in
// /proc/[pid]/ns/*.
+//
+// +stateify savable
type namespaceFD struct {
vfs.FileDescriptionDefaultImpl
vfs.LockFD
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index 1607eac19..e7f748655 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -616,6 +616,7 @@ type netSnmpData struct {
var _ dynamicInode = (*netSnmpData)(nil)
+// +stateify savable
type snmpLine struct {
prefix string
header string
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index ad08c3626..d8f5dd509 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -236,6 +236,8 @@ func (i *tasksInode) DecRef(context.Context) {
// staticFileSetStat implements a special static file that allows inode
// attributes to be set. This is to support /proc files that are readonly, but
// allow attributes to be set.
+//
+// +stateify savable
type staticFileSetStat struct {
dynamicBytesFileSetAttr
vfs.StaticData
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 459a8e52e..f268c59b0 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// +stateify savable
type selfSymlink struct {
implStatFS
kernfs.InodeAttrs
@@ -74,6 +75,7 @@ func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials,
return syserror.EPERM
}
+// +stateify savable
type threadSelfSymlink struct {
implStatFS
kernfs.InodeAttrs
@@ -121,6 +123,8 @@ func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Creden
// dynamicBytesFileSetAttr implements a special file that allows inode
// attributes to be set. This is to support /proc files that are readonly, but
// allow attributes to be set.
+//
+// +stateify savable
type dynamicBytesFileSetAttr struct {
kernfs.DynamicBytesFile
}
@@ -131,6 +135,8 @@ func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesyste
}
// cpuStats contains the breakdown of CPU time for /proc/stat.
+//
+// +stateify savable
type cpuStats struct {
// user is time spent in userspace tasks with non-positive niceness.
user uint64
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index a3ffbb15e..3312b0418 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// +stateify savable
type tcpMemDir int
const (
diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go
index 3c02af8c9..bf11b425a 100644
--- a/pkg/sentry/fsimpl/signalfd/signalfd.go
+++ b/pkg/sentry/fsimpl/signalfd/signalfd.go
@@ -27,6 +27,8 @@ import (
)
// SignalFileDescription implements vfs.FileDescriptionImpl for signal fds.
+//
+// +stateify savable
type SignalFileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -43,7 +45,7 @@ type SignalFileDescription struct {
target *kernel.Task
// mu protects mask.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// mask is the signal mask. Protected by mu.
mask linux.SignalSet
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index f1e75e277..29e5371d6 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -28,6 +28,8 @@ import (
)
// filesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type filesystemType struct{}
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
@@ -44,6 +46,7 @@ func (filesystemType) Name() string {
return "sockfs"
}
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -80,6 +83,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
}
// inode implements kernfs.Inode.
+//
+// +stateify savable
type inode struct {
kernfs.InodeAttrs
kernfs.InodeNoopRefCount
diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go
index 252f75d26..b75d70ae6 100644
--- a/pkg/sentry/fsimpl/sys/kcov.go
+++ b/pkg/sentry/fsimpl/sys/kcov.go
@@ -36,6 +36,8 @@ func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials)
}
// kcovInode implements kernfs.Inode.
+//
+// +stateify savable
type kcovInode struct {
kernfs.InodeAttrs
kernfs.InodeNoopRefCount
@@ -63,6 +65,7 @@ func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.D
return &fd.vfsfd, nil
}
+// +stateify savable
type kcovFD struct {
vfs.FileDescriptionDefaultImpl
vfs.NoLockFD
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 8bcefc103..1568c581f 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -34,9 +34,13 @@ const Name = "sysfs"
const defaultSysDirMode = linux.FileMode(0755)
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
kernfs.Filesystem
@@ -117,6 +121,8 @@ func (fs *filesystem) Release(ctx context.Context) {
}
// dir implements kernfs.Inode.
+//
+// +stateify savable
type dir struct {
dirRefs
kernfs.InodeAttrs
@@ -169,6 +175,8 @@ func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, err
}
// cpuFile implements kernfs.Inode.
+//
+// +stateify savable
type cpuFile struct {
implStatFS
kernfs.DynamicBytesFile
@@ -190,6 +198,7 @@ func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode li
return d
}
+// +stateify savable
type implStatFS struct{}
// StatFS implements kernfs.Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go
index ac8a4e3bb..8853c8ad2 100644
--- a/pkg/sentry/fsimpl/timerfd/timerfd.go
+++ b/pkg/sentry/fsimpl/timerfd/timerfd.go
@@ -28,6 +28,8 @@ import (
// TimerFileDescription implements vfs.FileDescriptionImpl for timer fds. It also
// implements ktime.TimerListener.
+//
+// +stateify savable
type TimerFileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/tmpfs/device_file.go b/pkg/sentry/fsimpl/tmpfs/device_file.go
index ac54d420d..9129d35b7 100644
--- a/pkg/sentry/fsimpl/tmpfs/device_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/device_file.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
)
+// +stateify savable
type deviceFile struct {
inode inode
kind vfs.DeviceKind
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index 070c75e68..e90669cf0 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
+// +stateify savable
type directory struct {
// Since directories can't be hard-linked, each directory can only be
// associated with a single dentry, which we can store in the directory
@@ -44,7 +45,7 @@ type directory struct {
// (with inode == nil) that represent the iteration position of
// directoryFDs. childList is used to support directoryFD.IterDirents()
// efficiently. childList is protected by iterMu.
- iterMu sync.Mutex
+ iterMu sync.Mutex `state:"nosave"`
childList dentryList
}
@@ -86,6 +87,7 @@ func (dir *directory) mayDelete(creds *auth.Credentials, child *dentry) error {
return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&dir.inode.mode)), auth.KUID(atomic.LoadUint32(&child.inode.uid)))
}
+// +stateify savable
type directoryFD struct {
fileDescription
vfs.DirectoryFileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 1362c1602..e39cd305b 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -673,11 +673,11 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
fs.mu.RUnlock()
return err
}
- if err := d.inode.setStat(ctx, rp.Credentials(), &opts); err != nil {
- fs.mu.RUnlock()
+ err = d.inode.setStat(ctx, rp.Credentials(), &opts)
+ fs.mu.RUnlock()
+ if err != nil {
return err
}
- fs.mu.RUnlock()
if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
@@ -822,11 +822,11 @@ func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
fs.mu.RUnlock()
return err
}
- if err := d.inode.setXattr(rp.Credentials(), &opts); err != nil {
- fs.mu.RUnlock()
+ err = d.inode.setXattr(rp.Credentials(), &opts)
+ fs.mu.RUnlock()
+ if err != nil {
return err
}
- fs.mu.RUnlock()
d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
@@ -840,11 +840,11 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,
fs.mu.RUnlock()
return err
}
- if err := d.inode.removeXattr(rp.Credentials(), name); err != nil {
- fs.mu.RUnlock()
+ err = d.inode.removeXattr(rp.Credentials(), name)
+ fs.mu.RUnlock()
+ if err != nil {
return err
}
- fs.mu.RUnlock()
d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
index 5b0471ff4..d772db9e9 100644
--- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go
+++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// +stateify savable
type namedPipe struct {
inode inode
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index b8699d064..a199eb33d 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -36,6 +36,8 @@ import (
)
// regularFile is a regular (=S_IFREG) tmpfs file.
+//
+// +stateify savable
type regularFile struct {
inode inode
@@ -66,7 +68,7 @@ type regularFile struct {
writableMappingPages uint64
// dataMu protects the fields below.
- dataMu sync.RWMutex
+ dataMu sync.RWMutex `state:"nosave"`
// data maps offsets into the file to offsets into memFile that store
// the file's data.
@@ -325,13 +327,14 @@ func (*regularFile) InvalidateUnsavable(context.Context) error {
return nil
}
+// +stateify savable
type regularFileFD struct {
fileDescription
// off is the file offset. off is accessed using atomic memory operations.
// offMu serializes operations that may mutate off.
off int64
- offMu sync.Mutex
+ offMu sync.Mutex `state:"nosave"`
}
// Release implements vfs.FileDescriptionImpl.Release.
diff --git a/pkg/sentry/fsimpl/tmpfs/socket_file.go b/pkg/sentry/fsimpl/tmpfs/socket_file.go
index 3ed650474..5699d5975 100644
--- a/pkg/sentry/fsimpl/tmpfs/socket_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/socket_file.go
@@ -21,6 +21,8 @@ import (
)
// socketFile is a socket (=S_IFSOCK) tmpfs file.
+//
+// +stateify savable
type socketFile struct {
inode inode
ep transport.BoundEndpoint
diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go
index b0de5fabe..a102a2ee2 100644
--- a/pkg/sentry/fsimpl/tmpfs/symlink.go
+++ b/pkg/sentry/fsimpl/tmpfs/symlink.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
+// +stateify savable
type symlink struct {
inode inode
target string // immutable
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 4658e1533..cefec8fde 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -51,9 +51,13 @@ import (
const Name = "tmpfs"
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
vfsfs vfs.Filesystem
@@ -67,7 +71,7 @@ type filesystem struct {
devMinor uint32
// mu serializes changes to the Dentry tree.
- mu sync.RWMutex
+ mu sync.RWMutex `state:"nosave"`
nextInoMinusOne uint64 // accessed using atomic memory operations
}
@@ -78,6 +82,8 @@ func (FilesystemType) Name() string {
}
// FilesystemOpts is used to pass configuration data to tmpfs.
+//
+// +stateify savable
type FilesystemOpts struct {
// RootFileType is the FileType of the filesystem root. Valid values
// are: S_IFDIR, S_IFREG, and S_IFLNK. Defaults to S_IFDIR.
@@ -221,6 +227,8 @@ var globalStatfs = linux.Statfs{
}
// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
type dentry struct {
vfsd vfs.Dentry
@@ -300,6 +308,8 @@ func (d *dentry) Watches() *vfs.Watches {
func (d *dentry) OnZeroWatches(context.Context) {}
// inode represents a filesystem object.
+//
+// +stateify savable
type inode struct {
// fs is the owning filesystem. fs is immutable.
fs *filesystem
@@ -316,12 +326,12 @@ type inode struct {
// Inode metadata. Writing multiple fields atomically requires holding
// mu, othewise atomic operations can be used.
- mu sync.Mutex
- mode uint32 // file type and mode
- nlink uint32 // protected by filesystem.mu instead of inode.mu
- uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
- gid uint32 // auth.KGID, but ...
- ino uint64 // immutable
+ mu sync.Mutex `state:"nosave"`
+ mode uint32 // file type and mode
+ nlink uint32 // protected by filesystem.mu instead of inode.mu
+ uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
+ gid uint32 // auth.KGID, but ...
+ ino uint64 // immutable
// Linux's tmpfs has no concept of btime.
atime int64 // nanoseconds
@@ -668,6 +678,8 @@ func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats
// fileDescription is embedded by tmpfs implementations of
// vfs.FileDescriptionImpl.
+//
+// +stateify savable
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index a81e7f714..3129f290d 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -74,9 +74,13 @@ var noCrashOnVerificationFailure bool
var verityMu sync.RWMutex
// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
type filesystem struct {
vfsfs vfs.Filesystem
@@ -101,11 +105,13 @@ type filesystem struct {
// renameMu synchronizes renaming with non-renaming operations in order
// to ensure consistent lock ordering between dentry.dirMu in different
// dentries.
- renameMu sync.RWMutex
+ renameMu sync.RWMutex `state:"nosave"`
}
// InternalFilesystemOptions may be passed as
// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem.
+//
+// +stateify savable
type InternalFilesystemOptions struct {
// RootMerkleFileName is the name of the verity root Merkle tree file.
RootMerkleFileName string
@@ -259,6 +265,8 @@ func (fs *filesystem) Release(ctx context.Context) {
}
// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
type dentry struct {
vfsd vfs.Dentry
@@ -285,7 +293,7 @@ type dentry struct {
// and dirents (if not nil) is a cache of dirents as returned by
// directoryFDs representing this directory. children is protected by
// dirMu.
- dirMu sync.Mutex
+ dirMu sync.Mutex `state:"nosave"`
children map[string]*dentry
// lowerVD is the VirtualDentry in the underlying file system.
@@ -429,6 +437,8 @@ func (d *dentry) readlink(ctx context.Context) (string, error) {
// FileDescription is a wrapper of the underlying lowerFD, with support to build
// Merkle trees through the Linux fs-verity API to verify contents read from
// lowerFD.
+//
+// +stateify savable
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go
index aad63aa99..d3e76ca7b 100644
--- a/pkg/sentry/kernel/kcov.go
+++ b/pkg/sentry/kernel/kcov.go
@@ -89,6 +89,10 @@ func (kcov *Kcov) TaskWork(t *Task) {
kcov.mu.Lock()
defer kcov.mu.Unlock()
+ if kcov.mode != linux.KCOV_TRACE_PC {
+ return
+ }
+
rw := &kcovReadWriter{
mf: kcov.mfp.MemoryFile(),
fr: kcov.mappable.FileRange(),
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index d9c62ff91..d6c21adb7 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -514,6 +514,10 @@ func (k *Kernel) SaveTo(w wire.Writer) error {
// flushMountSourceRefs flushes the MountSources for all mounted filesystems
// and open FDs.
func (k *Kernel) flushMountSourceRefs(ctx context.Context) error {
+ if VFS2Enabled {
+ return nil // Not relevant.
+ }
+
// Flush all mount sources for currently mounted filesystems in each task.
flushed := make(map[*fs.MountNamespace]struct{})
k.tasks.mu.RLock()
@@ -540,11 +544,6 @@ func (k *Kernel) flushMountSourceRefs(ctx context.Context) error {
//
// Precondition: Must be called with the kernel paused.
func (ts *TaskSet) forEachFDPaused(ctx context.Context, f func(*fs.File, *vfs.FileDescription) error) (err error) {
- // TODO(gvisor.dev/issue/1663): Add save support for VFS2.
- if VFS2Enabled {
- return nil
- }
-
ts.mu.RLock()
defer ts.mu.RUnlock()
for t := range ts.Root.tids {
@@ -563,6 +562,10 @@ func (ts *TaskSet) forEachFDPaused(ctx context.Context, f func(*fs.File, *vfs.Fi
func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error {
// TODO(gvisor.dev/issue/1663): Add save support for VFS2.
+ if VFS2Enabled {
+ return nil
+ }
+
return ts.forEachFDPaused(ctx, func(file *fs.File, _ *vfs.FileDescription) error {
if flags := file.Flags(); !flags.Write {
return nil
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index e5acbac50..163af329b 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -52,6 +52,7 @@ var _ = socket.SocketVFS2(&socketVFS2{})
func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol int, fd int, flags uint32) (*vfs.FileDescription, *syserr.Error) {
mnt := t.Kernel().SocketMount()
d := sockfs.NewDentry(t.Credentials(), mnt)
+ defer d.DecRef(t)
s := &socketVFS2{
socketOpsCommon: socketOpsCommon{
diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go
index bb205be0d..e8930f031 100644
--- a/pkg/sentry/socket/netlink/provider_vfs2.go
+++ b/pkg/sentry/socket/netlink/provider_vfs2.go
@@ -52,6 +52,7 @@ func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol
vfsfd := &s.vfsfd
mnt := t.Kernel().SocketMount()
d := sockfs.NewDentry(t.Credentials(), mnt)
+ defer d.DecRef(t)
if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{
DenyPRead: true,
DenyPWrite: true,
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 59f6fe44d..3345124cc 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -37,6 +37,8 @@ import (
// SocketVFS2 implements socket.SocketVFS2 (and by extension,
// vfs.FileDescriptionImpl) for Unix sockets.
+//
+// +stateify savable
type SocketVFS2 struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -54,6 +56,7 @@ var _ = socket.SocketVFS2(&SocketVFS2{})
func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
mnt := t.Kernel().SocketMount()
d := sockfs.NewDentry(t.Credentials(), mnt)
+ defer d.DecRef(t)
fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{})
if err != nil {
diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go
index a06c9b8ab..245d2c5cf 100644
--- a/pkg/sentry/state/state.go
+++ b/pkg/sentry/state/state.go
@@ -61,8 +61,10 @@ func (opts SaveOpts) Save(k *kernel.Kernel, w *watchdog.Watchdog) error {
log.Infof("Sandbox save started, pausing all tasks.")
k.Pause()
k.ReceiveTaskStates()
- defer k.Unpause()
- defer log.Infof("Tasks resumed after save.")
+ defer func() {
+ k.Unpause()
+ log.Infof("Tasks resumed after save.")
+ }()
w.Stop()
defer w.Start()
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index 9c4db3047..bdfd3ca8f 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -52,6 +52,8 @@ const (
)
// anonFilesystemType implements FilesystemType.
+//
+// +stateify savable
type anonFilesystemType struct{}
// GetFilesystem implements FilesystemType.GetFilesystem.
@@ -69,12 +71,15 @@ func (anonFilesystemType) Name() string {
//
// Since all Dentries in anonFilesystem are non-directories, all FilesystemImpl
// methods that would require an anonDentry to be a directory return ENOTDIR.
+//
+// +stateify savable
type anonFilesystem struct {
vfsfs Filesystem
devMinor uint32
}
+// +stateify savable
type anonDentry struct {
vfsd Dentry
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index a69a5b2f1..320ab7ce1 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -89,6 +89,8 @@ func (d *Dentry) Impl() DentryImpl {
// DentryImpl contains implementation details for a Dentry. Implementations of
// DentryImpl should contain their associated Dentry by value as their first
// field.
+//
+// +stateify savable
type DentryImpl interface {
// IncRef increments the Dentry's reference count. A Dentry with a non-zero
// reference count must remain coherent with the state of the filesystem.
diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go
index 1e9dffc8f..dde2ad79b 100644
--- a/pkg/sentry/vfs/device.go
+++ b/pkg/sentry/vfs/device.go
@@ -22,6 +22,8 @@ import (
)
// DeviceKind indicates whether a device is a block or character device.
+//
+// +stateify savable
type DeviceKind uint32
const (
@@ -44,6 +46,7 @@ func (kind DeviceKind) String() string {
}
}
+// +stateify savable
type devTuple struct {
kind DeviceKind
major uint32
diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go
index 754e76aec..8f36c3e3b 100644
--- a/pkg/sentry/vfs/epoll.go
+++ b/pkg/sentry/vfs/epoll.go
@@ -27,6 +27,8 @@ import (
var epollCycleMu sync.Mutex
// EpollInstance represents an epoll instance, as described by epoll(7).
+//
+// +stateify savable
type EpollInstance struct {
vfsfd FileDescription
FileDescriptionDefaultImpl
@@ -38,11 +40,11 @@ type EpollInstance struct {
// interest is the set of file descriptors that are registered with the
// EpollInstance for monitoring. interest is protected by interestMu.
- interestMu sync.Mutex
+ interestMu sync.Mutex `state:"nosave"`
interest map[epollInterestKey]*epollInterest
// mu protects fields in registered epollInterests.
- mu sync.Mutex
+ mu sync.Mutex `state:"nosave"`
// ready is the set of file descriptors that may be "ready" for I/O. Note
// that this must be an ordered list, not a map: "If more than maxevents
@@ -55,6 +57,7 @@ type EpollInstance struct {
ready epollInterestList
}
+// +stateify savable
type epollInterestKey struct {
// file is the registered FileDescription. No reference is held on file;
// instead, when the last reference is dropped, FileDescription.DecRef()
@@ -67,6 +70,8 @@ type epollInterestKey struct {
}
// epollInterest represents an EpollInstance's interest in a file descriptor.
+//
+// +stateify savable
type epollInterest struct {
// epoll is the owning EpollInstance. epoll is immutable.
epoll *EpollInstance
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 73bb36d3e..1eba0270f 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -37,11 +37,13 @@ import (
// FileDescription methods require that a reference is held.
//
// FileDescription is analogous to Linux's struct file.
+//
+// +stateify savable
type FileDescription struct {
FileDescriptionRefs
// flagsMu protects statusFlags and asyncHandler below.
- flagsMu sync.Mutex
+ flagsMu sync.Mutex `state:"nosave"`
// statusFlags contains status flags, "initialized by open(2) and possibly
// modified by fcntl()" - fcntl(2). statusFlags can be read using atomic
@@ -56,7 +58,7 @@ type FileDescription struct {
// epolls is the set of epollInterests registered for this FileDescription.
// epolls is protected by epollMu.
- epollMu sync.Mutex
+ epollMu sync.Mutex `state:"nosave"`
epolls map[*epollInterest]struct{}
// vd is the filesystem location at which this FileDescription was opened.
@@ -88,6 +90,8 @@ type FileDescription struct {
}
// FileDescriptionOptions contains options to FileDescription.Init().
+//
+// +stateify savable
type FileDescriptionOptions struct {
// If AllowDirectIO is true, allow O_DIRECT to be set on the file.
AllowDirectIO bool
@@ -451,6 +455,8 @@ type FileDescriptionImpl interface {
}
// Dirent holds the information contained in struct linux_dirent64.
+//
+// +stateify savable
type Dirent struct {
// Name is the filename.
Name string
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 78da16bac..48ca9de44 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -42,6 +42,8 @@ import (
// FileDescriptionDefaultImpl may be embedded by implementations of
// FileDescriptionImpl to obtain implementations of many FileDescriptionImpl
// methods with default behavior analogous to Linux's.
+//
+// +stateify savable
type FileDescriptionDefaultImpl struct{}
// OnClose implements FileDescriptionImpl.OnClose analogously to
@@ -166,6 +168,8 @@ func (FileDescriptionDefaultImpl) RemoveXattr(ctx context.Context, name string)
// DirectoryFileDescriptionDefaultImpl may be embedded by implementations of
// FileDescriptionImpl that always represent directories to obtain
// implementations of non-directory I/O methods that return EISDIR.
+//
+// +stateify savable
type DirectoryFileDescriptionDefaultImpl struct{}
// Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate.
@@ -196,6 +200,8 @@ func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src userme
// DentryMetadataFileDescriptionImpl may be embedded by implementations of
// FileDescriptionImpl for which FileDescriptionOptions.UseDentryMetadata is
// true to obtain implementations of Stat and SetStat that panic.
+//
+// +stateify savable
type DentryMetadataFileDescriptionImpl struct{}
// Stat implements FileDescriptionImpl.Stat.
@@ -210,12 +216,16 @@ func (DentryMetadataFileDescriptionImpl) SetStat(ctx context.Context, opts SetSt
// DynamicBytesSource represents a data source for a
// DynamicBytesFileDescriptionImpl.
+//
+// +stateify savable
type DynamicBytesSource interface {
// Generate writes the file's contents to buf.
Generate(ctx context.Context, buf *bytes.Buffer) error
}
// StaticData implements DynamicBytesSource over a static string.
+//
+// +stateify savable
type StaticData struct {
Data string
}
@@ -242,14 +252,24 @@ type WritableDynamicBytesSource interface {
//
// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first
// use.
+//
+// +stateify savable
type DynamicBytesFileDescriptionImpl struct {
data DynamicBytesSource // immutable
- mu sync.Mutex // protects the following fields
- buf bytes.Buffer
+ mu sync.Mutex `state:"nosave"` // protects the following fields
+ buf bytes.Buffer `state:".([]byte)"`
off int64
lastRead int64 // offset at which the last Read, PRead, or Seek ended
}
+func (fd *DynamicBytesFileDescriptionImpl) saveBuf() []byte {
+ return fd.buf.Bytes()
+}
+
+func (fd *DynamicBytesFileDescriptionImpl) loadBuf(p []byte) {
+ fd.buf.Write(p)
+}
+
// SetDataSource must be called exactly once on fd before first use.
func (fd *DynamicBytesFileDescriptionImpl) SetDataSource(data DynamicBytesSource) {
fd.data = data
@@ -382,6 +402,8 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M
// LockFD may be used by most implementations of FileDescriptionImpl.Lock*
// functions. Caller must call Init().
+//
+// +stateify savable
type LockFD struct {
locks *FileLocks
}
@@ -409,6 +431,8 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
// returning ENOLCK.
+//
+// +stateify savable
type NoLockFD struct{}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 7dae4e7e8..c93d94634 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -506,6 +506,8 @@ type FilesystemImpl interface {
// PrependPathAtVFSRootError is returned by implementations of
// FilesystemImpl.PrependPath() when they encounter the contextual VFS root.
+//
+// +stateify savable
type PrependPathAtVFSRootError struct{}
// Error implements error.Error.
@@ -516,6 +518,8 @@ func (PrependPathAtVFSRootError) Error() string {
// PrependPathAtNonMountRootError is returned by implementations of
// FilesystemImpl.PrependPath() when they encounter an independent ancestor
// Dentry that is not the Mount root.
+//
+// +stateify savable
type PrependPathAtNonMountRootError struct{}
// Error implements error.Error.
@@ -526,6 +530,8 @@ func (PrependPathAtNonMountRootError) Error() string {
// PrependPathSyntheticError is returned by implementations of
// FilesystemImpl.PrependPath() for which prepended names do not represent real
// paths.
+//
+// +stateify savable
type PrependPathSyntheticError struct{}
// Error implements error.Error.
diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go
index f2298f7f6..bc19db1d5 100644
--- a/pkg/sentry/vfs/filesystem_type.go
+++ b/pkg/sentry/vfs/filesystem_type.go
@@ -55,10 +55,13 @@ type registeredFilesystemType struct {
// RegisterFilesystemTypeOptions contains options to
// VirtualFilesystem.RegisterFilesystem().
+//
+// +stateify savable
type RegisterFilesystemTypeOptions struct {
- // If AllowUserMount is true, allow calls to VirtualFilesystem.MountAt()
- // for which MountOptions.InternalMount == false to use this filesystem
- // type.
+ // AllowUserMount determines whether users are allowed to mount a file system
+ // of this type, i.e. through mount(2). If AllowUserMount is true, allow calls
+ // to VirtualFilesystem.MountAt() for which MountOptions.InternalMount == false
+ // to use this filesystem type.
AllowUserMount bool
// If AllowUserList is true, make this filesystem type visible in
diff --git a/pkg/sentry/vfs/genericfstree/genericfstree.go b/pkg/sentry/vfs/genericfstree/genericfstree.go
index 8882fa84a..2d27d9d35 100644
--- a/pkg/sentry/vfs/genericfstree/genericfstree.go
+++ b/pkg/sentry/vfs/genericfstree/genericfstree.go
@@ -27,6 +27,8 @@ import (
)
// Dentry is a required type parameter that is a struct with the given fields.
+//
+// +stateify savable
type Dentry struct {
// vfsd is the embedded vfs.Dentry corresponding to this vfs.DentryImpl.
vfsd vfs.Dentry
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index aff220a61..3f0b8f45b 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -37,6 +37,8 @@ const inotifyEventBaseSize = 16
//
// The way events are labelled appears somewhat arbitrary, but they must match
// Linux so that IN_EXCL_UNLINK behaves as it does in Linux.
+//
+// +stateify savable
type EventType uint8
// PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and
diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index 42666eebf..55783d4eb 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -33,6 +33,8 @@ import (
// Note that in Linux these two types of locks are _not_ cooperative, because
// race and deadlock conditions make merging them prohibitive. We do the same
// and keep them oblivious to each other.
+//
+// +stateify savable
type FileLocks struct {
// bsd is a set of BSD-style advisory file wide locks, see flock(2).
bsd fslock.Locks
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 9da09d4c1..dfc3ae6c0 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -65,7 +65,7 @@ type Mount struct {
//
// Invariant: key.parent != nil iff key.point != nil. key.point belongs to
// key.parent.fs.
- key mountKey
+ key mountKey `state:".(VirtualDentry)"`
// ns is the namespace in which this Mount was mounted. ns is protected by
// VirtualFilesystem.mountMu.
@@ -345,6 +345,7 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti
return nil
}
+// +stateify savable
type umountRecursiveOptions struct {
// If eager is true, ensure that future calls to Mount.tryIncMountedRef()
// on umounted mounts fail.
@@ -414,7 +415,7 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
}
}
mnt.IncRef() // dropped by callers of umountRecursiveLocked
- mnt.storeKey(vd)
+ mnt.setKey(vd)
if vd.mount.children == nil {
vd.mount.children = make(map[*Mount]struct{})
}
@@ -439,13 +440,13 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
// * vfs.mounts.seq must be in a writer critical section.
// * mnt.parent() != nil.
func (vfs *VirtualFilesystem) disconnectLocked(mnt *Mount) VirtualDentry {
- vd := mnt.loadKey()
+ vd := mnt.getKey()
if checkInvariants {
if vd.mount != nil {
panic("VFS.disconnectLocked called on disconnected mount")
}
}
- mnt.storeKey(VirtualDentry{})
+ mnt.loadKey(VirtualDentry{})
delete(vd.mount.children, mnt)
atomic.AddUint32(&vd.dentry.mounts, math.MaxUint32) // -1
mnt.ns.mountpoints[vd.dentry]--
diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go
index 3335e4057..cb8c56bd3 100644
--- a/pkg/sentry/vfs/mount_test.go
+++ b/pkg/sentry/vfs/mount_test.go
@@ -38,7 +38,7 @@ func TestMountTableInsertLookup(t *testing.T) {
mt.Init()
mount := &Mount{}
- mount.storeKey(VirtualDentry{&Mount{}, &Dentry{}})
+ mount.setKey(VirtualDentry{&Mount{}, &Dentry{}})
mt.Insert(mount)
if m := mt.Lookup(mount.parent(), mount.point()); m != mount {
@@ -79,7 +79,7 @@ const enableComparativeBenchmarks = false
func newBenchMount() *Mount {
mount := &Mount{}
- mount.storeKey(VirtualDentry{&Mount{}, &Dentry{}})
+ mount.loadKey(VirtualDentry{&Mount{}, &Dentry{}})
return mount
}
@@ -94,7 +94,7 @@ func BenchmarkMountTableParallelLookup(b *testing.B) {
for i := 0; i < numMounts; i++ {
mount := newBenchMount()
mt.Insert(mount)
- keys = append(keys, mount.loadKey())
+ keys = append(keys, mount.saveKey())
}
var ready sync.WaitGroup
@@ -146,7 +146,7 @@ func BenchmarkMountMapParallelLookup(b *testing.B) {
keys := make([]VirtualDentry, 0, numMounts)
for i := 0; i < numMounts; i++ {
mount := newBenchMount()
- key := mount.loadKey()
+ key := mount.saveKey()
ms[key] = mount
keys = append(keys, key)
}
@@ -201,7 +201,7 @@ func BenchmarkMountSyncMapParallelLookup(b *testing.B) {
keys := make([]VirtualDentry, 0, numMounts)
for i := 0; i < numMounts; i++ {
mount := newBenchMount()
- key := mount.loadKey()
+ key := mount.getKey()
ms.Store(key, mount)
keys = append(keys, key)
}
@@ -283,7 +283,7 @@ func BenchmarkMountMapNegativeLookup(b *testing.B) {
ms := make(map[VirtualDentry]*Mount)
for i := 0; i < numMounts; i++ {
mount := newBenchMount()
- ms[mount.loadKey()] = mount
+ ms[mount.getKey()] = mount
}
negkeys := make([]VirtualDentry, 0, numMounts)
for i := 0; i < numMounts; i++ {
@@ -318,7 +318,7 @@ func BenchmarkMountSyncMapNegativeLookup(b *testing.B) {
var ms sync.Map
for i := 0; i < numMounts; i++ {
mount := newBenchMount()
- ms.Store(mount.loadKey(), mount)
+ ms.Store(mount.saveKey(), mount)
}
negkeys := make([]VirtualDentry, 0, numMounts)
for i := 0; i < numMounts; i++ {
@@ -372,7 +372,7 @@ func BenchmarkMountMapInsert(b *testing.B) {
b.ResetTimer()
for i := range mounts {
mount := mounts[i]
- ms[mount.loadKey()] = mount
+ ms[mount.saveKey()] = mount
}
}
@@ -392,7 +392,7 @@ func BenchmarkMountSyncMapInsert(b *testing.B) {
b.ResetTimer()
for i := range mounts {
mount := mounts[i]
- ms.Store(mount.loadKey(), mount)
+ ms.Store(mount.saveKey(), mount)
}
}
@@ -425,13 +425,13 @@ func BenchmarkMountMapRemove(b *testing.B) {
ms := make(map[VirtualDentry]*Mount)
for i := range mounts {
mount := mounts[i]
- ms[mount.loadKey()] = mount
+ ms[mount.saveKey()] = mount
}
b.ResetTimer()
for i := range mounts {
mount := mounts[i]
- delete(ms, mount.loadKey())
+ delete(ms, mount.saveKey())
}
}
@@ -447,12 +447,12 @@ func BenchmarkMountSyncMapRemove(b *testing.B) {
var ms sync.Map
for i := range mounts {
mount := mounts[i]
- ms.Store(mount.loadKey(), mount)
+ ms.Store(mount.saveKey(), mount)
}
b.ResetTimer()
for i := range mounts {
mount := mounts[i]
- ms.Delete(mount.loadKey())
+ ms.Delete(mount.saveKey())
}
}
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index da2a2e9c4..b7d122d22 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -34,6 +34,8 @@ import (
// structurally identical to VirtualDentry, but stores its fields as
// unsafe.Pointer since mutators synchronize with VFS path traversal using
// seqcounts.
+//
+// This is explicitly not savable.
type mountKey struct {
parent unsafe.Pointer // *Mount
point unsafe.Pointer // *Dentry
@@ -47,19 +49,23 @@ func (mnt *Mount) point() *Dentry {
return (*Dentry)(atomic.LoadPointer(&mnt.key.point))
}
-func (mnt *Mount) loadKey() VirtualDentry {
+func (mnt *Mount) getKey() VirtualDentry {
return VirtualDentry{
mount: mnt.parent(),
dentry: mnt.point(),
}
}
+func (mnt *Mount) saveKey() VirtualDentry { return mnt.getKey() }
+
// Invariant: mnt.key.parent == nil. vd.Ok().
-func (mnt *Mount) storeKey(vd VirtualDentry) {
+func (mnt *Mount) setKey(vd VirtualDentry) {
atomic.StorePointer(&mnt.key.parent, unsafe.Pointer(vd.mount))
atomic.StorePointer(&mnt.key.point, unsafe.Pointer(vd.dentry))
}
+func (mnt *Mount) loadKey(vd VirtualDentry) { mnt.setKey(vd) }
+
// mountTable maps (mount parent, mount point) pairs to mounts. It supports
// efficient concurrent lookup, even in the presence of concurrent mutators
// (provided mutation is sufficiently uncommon).
@@ -92,6 +98,7 @@ type mountTable struct {
// length and cap in separate uint32s) for ~free.
size uint64
+ // FIXME(gvisor.dev/issue/1663): Slots need to be saved.
slots unsafe.Pointer `state:"nosave"` // []mountSlot; never nil after Init
}
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index b33d36cb1..bc79e5ecc 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -21,6 +21,8 @@ import (
// GetDentryOptions contains options to VirtualFilesystem.GetDentryAt() and
// FilesystemImpl.GetDentryAt().
+//
+// +stateify savable
type GetDentryOptions struct {
// If CheckSearchable is true, FilesystemImpl.GetDentryAt() must check that
// the returned Dentry is a directory for which creds has search
@@ -30,6 +32,8 @@ type GetDentryOptions struct {
// MkdirOptions contains options to VirtualFilesystem.MkdirAt() and
// FilesystemImpl.MkdirAt().
+//
+// +stateify savable
type MkdirOptions struct {
// Mode is the file mode bits for the created directory.
Mode linux.FileMode
@@ -56,6 +60,8 @@ type MkdirOptions struct {
// MknodOptions contains options to VirtualFilesystem.MknodAt() and
// FilesystemImpl.MknodAt().
+//
+// +stateify savable
type MknodOptions struct {
// Mode is the file type and mode bits for the created file.
Mode linux.FileMode
@@ -72,6 +78,8 @@ type MknodOptions struct {
// MountFlags contains flags as specified for mount(2), e.g. MS_NOEXEC.
// MS_RDONLY is not part of MountFlags because it's tracked in Mount.writers.
+//
+// +stateify savable
type MountFlags struct {
// NoExec is equivalent to MS_NOEXEC.
NoExec bool
@@ -93,6 +101,8 @@ type MountFlags struct {
}
// MountOptions contains options to VirtualFilesystem.MountAt().
+//
+// +stateify savable
type MountOptions struct {
// Flags contains flags as specified for mount(2), e.g. MS_NOEXEC.
Flags MountFlags
@@ -103,13 +113,17 @@ type MountOptions struct {
// GetFilesystemOptions contains options to FilesystemType.GetFilesystem().
GetFilesystemOptions GetFilesystemOptions
- // If InternalMount is true, allow the use of filesystem types for which
- // RegisterFilesystemTypeOptions.AllowUserMount == false.
+ // InternalMount indicates whether the mount operation is coming from the
+ // application, i.e. through mount(2). If InternalMount is true, allow the use
+ // of filesystem types for which RegisterFilesystemTypeOptions.AllowUserMount
+ // == false.
InternalMount bool
}
// OpenOptions contains options to VirtualFilesystem.OpenAt() and
// FilesystemImpl.OpenAt().
+//
+// +stateify savable
type OpenOptions struct {
// Flags contains access mode and flags as specified for open(2).
//
@@ -135,6 +149,8 @@ type OpenOptions struct {
// ReadOptions contains options to FileDescription.PRead(),
// FileDescriptionImpl.PRead(), FileDescription.Read(), and
// FileDescriptionImpl.Read().
+//
+// +stateify savable
type ReadOptions struct {
// Flags contains flags as specified for preadv2(2).
Flags uint32
@@ -142,6 +158,8 @@ type ReadOptions struct {
// RenameOptions contains options to VirtualFilesystem.RenameAt() and
// FilesystemImpl.RenameAt().
+//
+// +stateify savable
type RenameOptions struct {
// Flags contains flags as specified for renameat2(2).
Flags uint32
@@ -153,6 +171,8 @@ type RenameOptions struct {
// SetStatOptions contains options to VirtualFilesystem.SetStatAt(),
// FilesystemImpl.SetStatAt(), FileDescription.SetStat(), and
// FileDescriptionImpl.SetStat().
+//
+// +stateify savable
type SetStatOptions struct {
// Stat is the metadata that should be set. Only fields indicated by
// Stat.Mask should be set.
@@ -174,6 +194,8 @@ type SetStatOptions struct {
// BoundEndpointOptions contains options to VirtualFilesystem.BoundEndpointAt()
// and FilesystemImpl.BoundEndpointAt().
+//
+// +stateify savable
type BoundEndpointOptions struct {
// Addr is the path of the file whose socket endpoint is being retrieved.
// It is generally irrelevant: most endpoints are stored at a dentry that
@@ -193,6 +215,8 @@ type BoundEndpointOptions struct {
// GetXattrOptions contains options to VirtualFilesystem.GetXattrAt(),
// FilesystemImpl.GetXattrAt(), FileDescription.GetXattr(), and
// FileDescriptionImpl.GetXattr().
+//
+// +stateify savable
type GetXattrOptions struct {
// Name is the name of the extended attribute to retrieve.
Name string
@@ -207,6 +231,8 @@ type GetXattrOptions struct {
// SetXattrOptions contains options to VirtualFilesystem.SetXattrAt(),
// FilesystemImpl.SetXattrAt(), FileDescription.SetXattr(), and
// FileDescriptionImpl.SetXattr().
+//
+// +stateify savable
type SetXattrOptions struct {
// Name is the name of the extended attribute being mutated.
Name string
@@ -221,6 +247,8 @@ type SetXattrOptions struct {
// StatOptions contains options to VirtualFilesystem.StatAt(),
// FilesystemImpl.StatAt(), FileDescription.Stat(), and
// FileDescriptionImpl.Stat().
+//
+// +stateify savable
type StatOptions struct {
// Mask is the set of fields in the returned Statx that the FilesystemImpl
// or FileDescriptionImpl should provide. Bits are as in linux.Statx.Mask.
@@ -238,6 +266,8 @@ type StatOptions struct {
}
// UmountOptions contains options to VirtualFilesystem.UmountAt().
+//
+// +stateify savable
type UmountOptions struct {
// Flags contains flags as specified for umount2(2).
Flags uint32
@@ -246,6 +276,8 @@ type UmountOptions struct {
// WriteOptions contains options to FileDescription.PWrite(),
// FileDescriptionImpl.PWrite(), FileDescription.Write(), and
// FileDescriptionImpl.Write().
+//
+// +stateify savable
type WriteOptions struct {
// Flags contains flags as specified for pwritev2(2).
Flags uint32
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 00eeb8842..d48520d58 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -26,6 +26,8 @@ import (
)
// AccessTypes is a bitmask of Unix file permissions.
+//
+// +stateify savable
type AccessTypes uint16
// Bits in AccessTypes.
diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go
index 3304372d9..e4fd55012 100644
--- a/pkg/sentry/vfs/resolving_path.go
+++ b/pkg/sentry/vfs/resolving_path.go
@@ -35,6 +35,8 @@ import (
// FilesystemImpl methods.
//
// ResolvingPath is loosely analogous to Linux's struct nameidata.
+//
+// +stateify savable
type ResolvingPath struct {
vfs *VirtualFilesystem
root VirtualDentry // refs borrowed from PathOperation
@@ -88,6 +90,7 @@ func init() {
// so error "constants" are really mutable vars, necessitating somewhat
// expensive interface object comparisons.
+// +stateify savable
type resolveMountRootOrJumpError struct{}
// Error implements error.Error.
@@ -95,6 +98,7 @@ func (resolveMountRootOrJumpError) Error() string {
return "resolving mount root or jump"
}
+// +stateify savable
type resolveMountPointError struct{}
// Error implements error.Error.
@@ -102,6 +106,7 @@ func (resolveMountPointError) Error() string {
return "resolving mount point"
}
+// +stateify savable
type resolveAbsSymlinkError struct{}
// Error implements error.Error.
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 1ebf355ef..5bd756ea5 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -163,6 +163,8 @@ func (vfs *VirtualFilesystem) Init(ctx context.Context) error {
// PathOperation is passed to VFS methods by pointer to reduce memory copying:
// it's somewhat large and should never escape. (Options structs are passed by
// pointer to VFS and FileDescription methods for the same reason.)
+//
+// +stateify savable
type PathOperation struct {
// Root is the VFS root. References on Root are borrowed from the provider
// of the PathOperation.
diff --git a/pkg/state/types.go b/pkg/state/types.go
index 215ef80f8..84aed8732 100644
--- a/pkg/state/types.go
+++ b/pkg/state/types.go
@@ -107,6 +107,14 @@ func lookupNameFields(typ reflect.Type) (string, []string, bool) {
}
return name, nil, true
}
+ // Sanity check the type.
+ if raceEnabled {
+ if _, ok := reverseTypeDatabase[typ]; !ok {
+ // The type was not registered? Must be an embedded
+ // structure or something else.
+ return "", nil, false
+ }
+ }
// Extract the name from the object.
name := t.StateTypeName()
fields := t.StateFields()
@@ -313,6 +321,9 @@ var primitiveTypeDatabase = func() map[string]reflect.Type {
// globalTypeDatabase is used for dispatching interfaces on decode.
var globalTypeDatabase = map[string]reflect.Type{}
+// reverseTypeDatabase is a reverse mapping.
+var reverseTypeDatabase = map[reflect.Type]string{}
+
// Register registers a type.
//
// This must be called on init and only done once.
@@ -358,4 +369,7 @@ func Register(t Type) {
Failf("conflicting name for %T: matches interfaceType", t)
}
globalTypeDatabase[name] = typ
+ if raceEnabled {
+ reverseTypeDatabase[typ] = name
+ }
}
diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index c975ad9cf..12b061def 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -61,8 +61,8 @@ func TestTimeouts(t *testing.T) {
func newLoopbackStack() (*stack.Stack, *tcpip.Error) {
// Create the stack and add a NIC.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol, udp.NewProtocol},
})
if err := s.CreateNIC(NICID, loopback.New()); err != nil {
diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go
index b769094dc..19627fa9b 100644
--- a/pkg/tcpip/checker/checker.go
+++ b/pkg/tcpip/checker/checker.go
@@ -339,7 +339,7 @@ func NoChecksum(noChecksum bool) TransportChecker {
udp, ok := h.(header.UDP)
if !ok {
- return
+ t.Fatalf("UDP header not found in h: %T", h)
}
if b := udp.Checksum() == 0; b != noChecksum {
@@ -348,14 +348,14 @@ func NoChecksum(noChecksum bool) TransportChecker {
}
}
-// SeqNum creates a checker that checks the sequence number.
-func SeqNum(seq uint32) TransportChecker {
+// TCPSeqNum creates a checker that checks the sequence number.
+func TCPSeqNum(seq uint32) TransportChecker {
return func(t *testing.T, h header.Transport) {
t.Helper()
tcp, ok := h.(header.TCP)
if !ok {
- return
+ t.Fatalf("TCP header not found in h: %T", h)
}
if s := tcp.SequenceNumber(); s != seq {
@@ -364,14 +364,14 @@ func SeqNum(seq uint32) TransportChecker {
}
}
-// AckNum creates a checker that checks the ack number.
-func AckNum(seq uint32) TransportChecker {
+// TCPAckNum creates a checker that checks the ack number.
+func TCPAckNum(seq uint32) TransportChecker {
return func(t *testing.T, h header.Transport) {
t.Helper()
tcp, ok := h.(header.TCP)
if !ok {
- return
+ t.Fatalf("TCP header not found in h: %T", h)
}
if s := tcp.AckNumber(); s != seq {
@@ -380,18 +380,52 @@ func AckNum(seq uint32) TransportChecker {
}
}
-// Window creates a checker that checks the tcp window.
-func Window(window uint16) TransportChecker {
+// TCPWindow creates a checker that checks the tcp window.
+func TCPWindow(window uint16) TransportChecker {
return func(t *testing.T, h header.Transport) {
t.Helper()
tcp, ok := h.(header.TCP)
if !ok {
- return
+ t.Fatalf("TCP header not found in hdr : %T", h)
}
if w := tcp.WindowSize(); w != window {
- t.Errorf("Bad window, got 0x%x, want 0x%x", w, window)
+ t.Errorf("Bad window, got %d, want %d", w, window)
+ }
+ }
+}
+
+// TCPWindowGreaterThanEq creates a checker that checks that the TCP window
+// is greater than or equal to the provided value.
+func TCPWindowGreaterThanEq(window uint16) TransportChecker {
+ return func(t *testing.T, h header.Transport) {
+ t.Helper()
+
+ tcp, ok := h.(header.TCP)
+ if !ok {
+ t.Fatalf("TCP header not found in h: %T", h)
+ }
+
+ if w := tcp.WindowSize(); w < window {
+ t.Errorf("Bad window, got %d, want > %d", w, window)
+ }
+ }
+}
+
+// TCPWindowLessThanEq creates a checker that checks that the tcp window
+// is less than or equal to the provided value.
+func TCPWindowLessThanEq(window uint16) TransportChecker {
+ return func(t *testing.T, h header.Transport) {
+ t.Helper()
+
+ tcp, ok := h.(header.TCP)
+ if !ok {
+ t.Fatalf("TCP header not found in h: %T", h)
+ }
+
+ if w := tcp.WindowSize(); w > window {
+ t.Errorf("Bad window, got %d, want < %d", w, window)
}
}
}
@@ -403,7 +437,7 @@ func TCPFlags(flags uint8) TransportChecker {
tcp, ok := h.(header.TCP)
if !ok {
- return
+ t.Fatalf("TCP header not found in h: %T", h)
}
if f := tcp.Flags(); f != flags {
@@ -420,7 +454,7 @@ func TCPFlagsMatch(flags, mask uint8) TransportChecker {
tcp, ok := h.(header.TCP)
if !ok {
- return
+ t.Fatalf("TCP header not found in h: %T", h)
}
if f := tcp.Flags(); (f & mask) != (flags & mask) {
diff --git a/pkg/tcpip/faketime/faketime.go b/pkg/tcpip/faketime/faketime.go
index 1193f1d7d..f7a4fbde1 100644
--- a/pkg/tcpip/faketime/faketime.go
+++ b/pkg/tcpip/faketime/faketime.go
@@ -24,6 +24,26 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
)
+// NullClock implements a clock that never advances.
+type NullClock struct{}
+
+var _ tcpip.Clock = (*NullClock)(nil)
+
+// NowNanoseconds implements tcpip.Clock.NowNanoseconds.
+func (*NullClock) NowNanoseconds() int64 {
+ return 0
+}
+
+// NowMonotonic implements tcpip.Clock.NowMonotonic.
+func (*NullClock) NowMonotonic() int64 {
+ return 0
+}
+
+// AfterFunc implements tcpip.Clock.AfterFunc.
+func (*NullClock) AfterFunc(time.Duration, func()) tcpip.Timer {
+ return nil
+}
+
// ManualClock implements tcpip.Clock and only advances manually with Advance
// method.
type ManualClock struct {
diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD
index 46083925c..376583f3c 100644
--- a/pkg/tcpip/network/BUILD
+++ b/pkg/tcpip/network/BUILD
@@ -9,6 +9,7 @@ go_test(
"ip_test.go",
],
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 81e286e80..b47a7be51 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -15,17 +15,11 @@
// Package arp implements the ARP network protocol. It is used to resolve
// IPv4 addresses into link-local MAC addresses, and advertises IPv4
// addresses of its stack with the local network.
-//
-// To use it in the networking stack, pass arp.NewProtocol() as one of the
-// network protocols when calling stack.New. Then add an "arp" address to every
-// NIC on the stack that should respond to ARP requests. That is:
-//
-// if err := s.AddAddress(1, arp.ProtocolNumber, "arp"); err != nil {
-// // handle err
-// }
package arp
import (
+ "sync/atomic"
+
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -41,15 +35,57 @@ const (
ProtocolAddress = tcpip.Address("arp")
)
-// endpoint implements stack.NetworkEndpoint.
+var _ stack.AddressableEndpoint = (*endpoint)(nil)
+var _ stack.NetworkEndpoint = (*endpoint)(nil)
+
type endpoint struct {
- protocol *protocol
- nicID tcpip.NICID
+ stack.AddressableEndpointState
+
+ protocol *protocol
+
+ // enabled is set to 1 when the NIC is enabled and 0 when it is disabled.
+ //
+ // Must be accessed using atomic operations.
+ enabled uint32
+
+ nic stack.NetworkInterface
linkEP stack.LinkEndpoint
linkAddrCache stack.LinkAddressCache
nud stack.NUDHandler
}
+func (e *endpoint) Enable() *tcpip.Error {
+ if !e.nic.Enabled() {
+ return tcpip.ErrNotPermitted
+ }
+
+ e.setEnabled(true)
+ return nil
+}
+
+func (e *endpoint) Enabled() bool {
+ return e.nic.Enabled() && e.isEnabled()
+}
+
+// isEnabled returns true if the endpoint is enabled, regardless of the
+// enabled status of the NIC.
+func (e *endpoint) isEnabled() bool {
+ return atomic.LoadUint32(&e.enabled) == 1
+}
+
+// setEnabled sets the enabled status for the endpoint.
+func (e *endpoint) setEnabled(v bool) {
+ if v {
+ atomic.StoreUint32(&e.enabled, 1)
+ } else {
+ atomic.StoreUint32(&e.enabled, 0)
+ }
+}
+
+func (e *endpoint) Disable() {
+ e.setEnabled(false)
+}
+
// DefaultTTL is unused for ARP. It implements stack.NetworkEndpoint.
func (e *endpoint) DefaultTTL() uint8 {
return 0
@@ -60,19 +96,13 @@ func (e *endpoint) MTU() uint32 {
return lmtu - uint32(e.MaxHeaderLength())
}
-func (e *endpoint) NICID() tcpip.NICID {
- return e.nicID
-}
-
-func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
- return e.linkEP.Capabilities()
-}
-
func (e *endpoint) MaxHeaderLength() uint16 {
return e.linkEP.MaxHeaderLength() + header.ARPSize
}
-func (e *endpoint) Close() {}
+func (e *endpoint) Close() {
+ e.AddressableEndpointState.Cleanup()
+}
func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) *tcpip.Error {
return tcpip.ErrNotSupported
@@ -93,6 +123,10 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
}
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+ if !e.isEnabled() {
+ return
+ }
+
h := header.ARP(pkt.NetworkHeader().View())
if !h.IsValid() {
return
@@ -103,15 +137,15 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
localAddr := tcpip.Address(h.ProtocolAddressTarget())
if e.nud == nil {
- if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+ if e.linkAddrCache.CheckLocalAddress(e.nic.ID(), header.IPv4ProtocolNumber, localAddr) == 0 {
return // we have no useful answer, ignore the request
}
addr := tcpip.Address(h.ProtocolAddressSender())
linkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
- e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr)
+ e.linkAddrCache.AddLinkAddress(e.nic.ID(), addr, linkAddr)
} else {
- if r.Stack().CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+ if r.Stack().CheckLocalAddress(e.nic.ID(), header.IPv4ProtocolNumber, localAddr) == 0 {
return // we have no useful answer, ignore the request
}
@@ -137,7 +171,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
linkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
if e.nud == nil {
- e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr)
+ e.linkAddrCache.AddLinkAddress(e.nic.ID(), addr, linkAddr)
return
}
@@ -169,14 +203,16 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
return tcpip.Address(h.ProtocolAddressSender()), ProtocolAddress
}
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
- return &endpoint{
+func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
+ e := &endpoint{
protocol: p,
- nicID: nicID,
- linkEP: sender,
+ nic: nic,
+ linkEP: nic.LinkEndpoint(),
linkAddrCache: linkAddrCache,
nud: nud,
}
+ e.AddressableEndpointState.Init(e)
+ return e
}
// LinkAddressProtocol implements stack.LinkAddressResolver.LinkAddressProtocol.
@@ -238,13 +274,11 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
return 0, false, parse.ARP(pkt)
}
-// ReturnError implements stack.TransportProtocol.ReturnError.
-func (*protocol) ReturnError(*stack.Route, tcpip.ICMPReason, *stack.PacketBuffer) *tcpip.Error {
- // In ARP, there is no such response so do nothing.
- return nil
-}
-
// NewProtocol returns an ARP network protocol.
-func NewProtocol() stack.NetworkProtocol {
+//
+// Note, to make sure that the ARP endpoint receives ARP packets, the "arp"
+// address must be added to every NIC that should respond to ARP requests. See
+// ProtocolAddress for more details.
+func NewProtocol(*stack.Stack) stack.NetworkProtocol {
return &protocol{}
}
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 9c9a859e3..626af975a 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -176,8 +176,8 @@ func newTestContext(t *testing.T, useNeighborCache bool) *testContext {
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, arp.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4},
NUDConfigs: c,
NUDDisp: &d,
UseNeighborCache: useNeighborCache,
@@ -442,7 +442,7 @@ func TestLinkAddressRequest(t *testing.T) {
}
for _, test := range tests {
- p := arp.NewProtocol()
+ p := arp.NewProtocol(nil)
linkRes, ok := p.(stack.LinkAddressResolver)
if !ok {
t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD
index 96c5f42f8..e247f06a4 100644
--- a/pkg/tcpip/network/fragmentation/BUILD
+++ b/pkg/tcpip/network/fragmentation/BUILD
@@ -43,5 +43,6 @@ go_test(
library = ":fragmentation",
deps = [
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/faketime",
],
)
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index 6a4843f92..e1909fab0 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -81,6 +81,8 @@ type Fragmentation struct {
size int
timeout time.Duration
blockSize uint16
+ clock tcpip.Clock
+ releaseJob *tcpip.Job
}
// NewFragmentation creates a new Fragmentation.
@@ -97,7 +99,7 @@ type Fragmentation struct {
// reassemblingTimeout specifies the maximum time allowed to reassemble a packet.
// Fragments are lazily evicted only when a new a packet with an
// already existing fragmentation-id arrives after the timeout.
-func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation {
+func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration, clock tcpip.Clock) *Fragmentation {
if lowMemoryLimit >= highMemoryLimit {
lowMemoryLimit = highMemoryLimit
}
@@ -110,13 +112,17 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
blockSize = minBlockSize
}
- return &Fragmentation{
+ f := &Fragmentation{
reassemblers: make(map[FragmentID]*reassembler),
highLimit: highMemoryLimit,
lowLimit: lowMemoryLimit,
timeout: reassemblingTimeout,
blockSize: blockSize,
+ clock: clock,
}
+ f.releaseJob = tcpip.NewJob(f.clock, &f.mu, f.releaseReassemblersLocked)
+
+ return f
}
// Process processes an incoming fragment belonging to an ID and returns a
@@ -155,15 +161,17 @@ func (f *Fragmentation) Process(
f.mu.Lock()
r, ok := f.reassemblers[id]
- if ok && r.tooOld(f.timeout) {
- // This is very likely to be an id-collision or someone performing a slow-rate attack.
- f.release(r)
- ok = false
- }
if !ok {
- r = newReassembler(id)
+ r = newReassembler(id, f.clock)
f.reassemblers[id] = r
+ wasEmpty := f.rList.Empty()
f.rList.PushFront(r)
+ if wasEmpty {
+ // If we have just pushed a first reassembler into an empty list, we
+ // should kickstart the release job. The release job will keep
+ // rescheduling itself until the list becomes empty.
+ f.releaseReassemblersLocked()
+ }
}
f.mu.Unlock()
@@ -211,3 +219,27 @@ func (f *Fragmentation) release(r *reassembler) {
f.size = 0
}
}
+
+// releaseReassemblersLocked releases already-expired reassemblers, then
+// schedules the job to call back itself for the remaining reassemblers if
+// any. This function must be called with f.mu locked.
+func (f *Fragmentation) releaseReassemblersLocked() {
+ now := f.clock.NowMonotonic()
+ for {
+ // The reassembler at the end of the list is the oldest.
+ r := f.rList.Back()
+ if r == nil {
+ // The list is empty.
+ break
+ }
+ elapsed := time.Duration(now-r.creationTime) * time.Nanosecond
+ if f.timeout > elapsed {
+ // If the oldest reassembler has not expired, schedule the release
+ // job so that this function is called back when it has expired.
+ f.releaseJob.Schedule(f.timeout - elapsed)
+ break
+ }
+ // If the oldest reassembler has already expired, release it.
+ f.release(r)
+ }
+}
diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go
index 416604659..189b223c5 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation_test.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go
@@ -21,6 +21,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/faketime"
)
// vv is a helper to build VectorisedView from different strings.
@@ -95,7 +96,7 @@ var processTestCases = []struct {
func TestFragmentationProcess(t *testing.T) {
for _, c := range processTestCases {
t.Run(c.comment, func(t *testing.T) {
- f := NewFragmentation(minBlockSize, 1024, 512, DefaultReassembleTimeout)
+ f := NewFragmentation(minBlockSize, 1024, 512, DefaultReassembleTimeout, &faketime.NullClock{})
firstFragmentProto := c.in[0].proto
for i, in := range c.in {
vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.vv)
@@ -131,25 +132,126 @@ func TestFragmentationProcess(t *testing.T) {
}
func TestReassemblingTimeout(t *testing.T) {
- timeout := time.Millisecond
- f := NewFragmentation(minBlockSize, 1024, 512, timeout)
- // Send first fragment with id = 0, first = 0, last = 0, and more = true.
- f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
- // Sleep more than the timeout.
- time.Sleep(2 * timeout)
- // Send another fragment that completes a packet.
- // However, no packet should be reassembled because the fragment arrived after the timeout.
- _, _, done, err := f.Process(FragmentID{}, 1, 1, false, 0xFF, vv(1, "1"))
- if err != nil {
- t.Fatalf("f.Process(0, 1, 1, false, 0xFF, vv(1, \"1\")) failed: %v", err)
+ const (
+ reassemblyTimeout = time.Millisecond
+ protocol = 0xff
+ )
+
+ type fragment struct {
+ first uint16
+ last uint16
+ more bool
+ data string
}
- if done {
- t.Errorf("Fragmentation does not respect the reassembling timeout.")
+
+ type event struct {
+ // name is a nickname of this event.
+ name string
+
+ // clockAdvance is a duration to advance the clock. The clock advances
+ // before a fragment specified in the fragment field is processed.
+ clockAdvance time.Duration
+
+ // fragment is a fragment to process. This can be nil if there is no
+ // fragment to process.
+ fragment *fragment
+
+ // expectDone is true if the fragmentation instance should report the
+ // reassembly is done after the fragment is processd.
+ expectDone bool
+
+ // sizeAfterEvent is the expected size of the fragmentation instance after
+ // the event.
+ sizeAfterEvent int
+ }
+
+ half1 := &fragment{first: 0, last: 0, more: true, data: "0"}
+ half2 := &fragment{first: 1, last: 1, more: false, data: "1"}
+
+ tests := []struct {
+ name string
+ events []event
+ }{
+ {
+ name: "half1 and half2 are reassembled successfully",
+ events: []event{
+ {
+ name: "half1",
+ fragment: half1,
+ expectDone: false,
+ sizeAfterEvent: 1,
+ },
+ {
+ name: "half2",
+ fragment: half2,
+ expectDone: true,
+ sizeAfterEvent: 0,
+ },
+ },
+ },
+ {
+ name: "half1 timeout, half2 timeout",
+ events: []event{
+ {
+ name: "half1",
+ fragment: half1,
+ expectDone: false,
+ sizeAfterEvent: 1,
+ },
+ {
+ name: "half1 just before reassembly timeout",
+ clockAdvance: reassemblyTimeout - 1,
+ sizeAfterEvent: 1,
+ },
+ {
+ name: "half1 reassembly timeout",
+ clockAdvance: 1,
+ sizeAfterEvent: 0,
+ },
+ {
+ name: "half2",
+ fragment: half2,
+ expectDone: false,
+ sizeAfterEvent: 1,
+ },
+ {
+ name: "half2 just before reassembly timeout",
+ clockAdvance: reassemblyTimeout - 1,
+ sizeAfterEvent: 1,
+ },
+ {
+ name: "half2 reassembly timeout",
+ clockAdvance: 1,
+ sizeAfterEvent: 0,
+ },
+ },
+ },
+ }
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ clock := faketime.NewManualClock()
+ f := NewFragmentation(minBlockSize, HighFragThreshold, LowFragThreshold, reassemblyTimeout, clock)
+ for _, event := range test.events {
+ clock.Advance(event.clockAdvance)
+ if frag := event.fragment; frag != nil {
+ _, _, done, err := f.Process(FragmentID{}, frag.first, frag.last, frag.more, protocol, vv(len(frag.data), frag.data))
+ if err != nil {
+ t.Fatalf("%s: f.Process failed: %s", event.name, err)
+ }
+ if done != event.expectDone {
+ t.Fatalf("%s: got done = %t, want = %t", event.name, done, event.expectDone)
+ }
+ }
+ if got, want := f.size, event.sizeAfterEvent; got != want {
+ t.Errorf("%s: got f.size = %d, want = %d", event.name, got, want)
+ }
+ }
+ })
}
}
func TestMemoryLimits(t *testing.T) {
- f := NewFragmentation(minBlockSize, 3, 1, DefaultReassembleTimeout)
+ f := NewFragmentation(minBlockSize, 3, 1, DefaultReassembleTimeout, &faketime.NullClock{})
// Send first fragment with id = 0.
f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, vv(1, "0"))
// Send first fragment with id = 1.
@@ -173,7 +275,7 @@ func TestMemoryLimits(t *testing.T) {
}
func TestMemoryLimitsIgnoresDuplicates(t *testing.T) {
- f := NewFragmentation(minBlockSize, 1, 0, DefaultReassembleTimeout)
+ f := NewFragmentation(minBlockSize, 1, 0, DefaultReassembleTimeout, &faketime.NullClock{})
// Send first fragment with id = 0.
f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
// Send the same packet again.
@@ -268,7 +370,7 @@ func TestErrors(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, DefaultReassembleTimeout)
+ f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, DefaultReassembleTimeout, &faketime.NullClock{})
_, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, vv(len(test.data), test.data))
if !errors.Is(err, test.err) {
t.Errorf("got Process(_, %d, %d, %t, _, %q) = (_, _, _, %v), want = (_, _, _, %v)", test.first, test.last, test.more, test.data, err, test.err)
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index f044867dc..9bb051a30 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -18,9 +18,9 @@ import (
"container/heap"
"fmt"
"math"
- "time"
"gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
)
@@ -40,15 +40,15 @@ type reassembler struct {
deleted int
heap fragHeap
done bool
- creationTime time.Time
+ creationTime int64
}
-func newReassembler(id FragmentID) *reassembler {
+func newReassembler(id FragmentID, clock tcpip.Clock) *reassembler {
r := &reassembler{
id: id,
holes: make([]hole, 0, 16),
heap: make(fragHeap, 0, 8),
- creationTime: time.Now(),
+ creationTime: clock.NowMonotonic(),
}
r.holes = append(r.holes, hole{
first: 0,
@@ -116,10 +116,6 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, vv buf
return res, r.proto, true, consumed, nil
}
-func (r *reassembler) tooOld(timeout time.Duration) bool {
- return time.Now().Sub(r.creationTime) > timeout
-}
-
func (r *reassembler) checkDoneOrMark() bool {
r.mu.Lock()
prev := r.done
diff --git a/pkg/tcpip/network/fragmentation/reassembler_test.go b/pkg/tcpip/network/fragmentation/reassembler_test.go
index dff7c9dcb..a0a04a027 100644
--- a/pkg/tcpip/network/fragmentation/reassembler_test.go
+++ b/pkg/tcpip/network/fragmentation/reassembler_test.go
@@ -18,6 +18,8 @@ import (
"math"
"reflect"
"testing"
+
+ "gvisor.dev/gvisor/pkg/tcpip/faketime"
)
type updateHolesInput struct {
@@ -94,7 +96,7 @@ var holesTestCases = []struct {
func TestUpdateHoles(t *testing.T) {
for _, c := range holesTestCases {
- r := newReassembler(FragmentID{})
+ r := newReassembler(FragmentID{}, &faketime.NullClock{})
for _, i := range c.in {
r.updateHoles(i.first, i.last, i.more)
}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index e45dd17f8..56a56362e 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -17,6 +17,7 @@ package ip_test
import (
"testing"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -98,9 +99,10 @@ func (t *testObject) checkValues(protocol tcpip.TransportProtocolNumber, vv buff
// DeliverTransportPacket is called by network endpoints after parsing incoming
// packets. This is used by the test object to verify that the results of the
// parsing are expected.
-func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt *stack.PacketBuffer) {
+func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt *stack.PacketBuffer) stack.TransportPacketDisposition {
t.checkValues(protocol, pkt.Data, r.RemoteAddress, r.LocalAddress)
t.dataCalls++
+ return stack.TransportPacketHandled
}
// DeliverTransportControlPacket is called by network endpoints after parsing
@@ -194,8 +196,8 @@ func (*testObject) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
})
s.CreateNIC(nicID, loopback.New())
s.AddAddress(nicID, ipv4.ProtocolNumber, local)
@@ -210,8 +212,8 @@ func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
})
s.CreateNIC(nicID, loopback.New())
s.AddAddress(nicID, ipv6.ProtocolNumber, local)
@@ -228,8 +230,8 @@ func buildDummyStack(t *testing.T) *stack.Stack {
t.Helper()
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
})
e := channel.New(0, 1280, "")
if err := s.CreateNIC(nicID, e); err != nil {
@@ -247,10 +249,137 @@ func buildDummyStack(t *testing.T) *stack.Stack {
return s
}
+var _ stack.NetworkInterface = (*testInterface)(nil)
+
+type testInterface struct {
+ tester testObject
+
+ mu struct {
+ sync.RWMutex
+ disabled bool
+ }
+}
+
+func (*testInterface) ID() tcpip.NICID {
+ return nicID
+}
+
+func (*testInterface) IsLoopback() bool {
+ return false
+}
+
+func (*testInterface) Name() string {
+ return ""
+}
+
+func (t *testInterface) Enabled() bool {
+ t.mu.RLock()
+ defer t.mu.RUnlock()
+ return !t.mu.disabled
+}
+
+func (t *testInterface) setEnabled(v bool) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ t.mu.disabled = !v
+}
+
+func (t *testInterface) LinkEndpoint() stack.LinkEndpoint {
+ return &t.tester
+}
+
+func TestEnableWhenNICDisabled(t *testing.T) {
+ tests := []struct {
+ name string
+ protocolFactory stack.NetworkProtocolFactory
+ protoNum tcpip.NetworkProtocolNumber
+ }{
+ {
+ name: "IPv4",
+ protocolFactory: ipv4.NewProtocol,
+ protoNum: ipv4.ProtocolNumber,
+ },
+ {
+ name: "IPv6",
+ protocolFactory: ipv6.NewProtocol,
+ protoNum: ipv6.ProtocolNumber,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ var nic testInterface
+ nic.setEnabled(false)
+
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{test.protocolFactory},
+ })
+ p := s.NetworkProtocolInstance(test.protoNum)
+
+ // We pass nil for all parameters except the NetworkInterface and Stack
+ // since Enable only depends on these.
+ ep := p.NewEndpoint(&nic, nil, nil, nil)
+
+ // The endpoint should initially be disabled, regardless the NIC's enabled
+ // status.
+ if ep.Enabled() {
+ t.Fatal("got ep.Enabled() = true, want = false")
+ }
+ nic.setEnabled(true)
+ if ep.Enabled() {
+ t.Fatal("got ep.Enabled() = true, want = false")
+ }
+
+ // Attempting to enable the endpoint while the NIC is disabled should
+ // fail.
+ nic.setEnabled(false)
+ if err := ep.Enable(); err != tcpip.ErrNotPermitted {
+ t.Fatalf("got ep.Enable() = %s, want = %s", err, tcpip.ErrNotPermitted)
+ }
+ // ep should consider the NIC's enabled status when determining its own
+ // enabled status so we "enable" the NIC to read just the endpoint's
+ // enabled status.
+ nic.setEnabled(true)
+ if ep.Enabled() {
+ t.Fatal("got ep.Enabled() = true, want = false")
+ }
+
+ // Enabling the interface after the NIC has been enabled should succeed.
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+ if !ep.Enabled() {
+ t.Fatal("got ep.Enabled() = false, want = true")
+ }
+
+ // ep should consider the NIC's enabled status when determining its own
+ // enabled status.
+ nic.setEnabled(false)
+ if ep.Enabled() {
+ t.Fatal("got ep.Enabled() = true, want = false")
+ }
+
+ // Disabling the endpoint when the NIC is enabled should make the endpoint
+ // disabled.
+ nic.setEnabled(true)
+ ep.Disable()
+ if ep.Enabled() {
+ t.Fatal("got ep.Enabled() = true, want = false")
+ }
+ })
+ }
+}
+
func TestIPv4Send(t *testing.T) {
- o := testObject{t: t, v4: true}
- proto := ipv4.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, nil, &o, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ v4: true,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, nil)
defer ep.Close()
// Allocate and initialize the payload view.
@@ -266,10 +395,10 @@ func TestIPv4Send(t *testing.T) {
})
// Issue the write.
- o.protocol = 123
- o.srcAddr = localIpv4Addr
- o.dstAddr = remoteIpv4Addr
- o.contents = payload
+ nic.tester.protocol = 123
+ nic.tester.srcAddr = localIpv4Addr
+ nic.tester.dstAddr = remoteIpv4Addr
+ nic.tester.contents = payload
r, err := buildIPv4Route(localIpv4Addr, remoteIpv4Addr)
if err != nil {
@@ -285,11 +414,21 @@ func TestIPv4Send(t *testing.T) {
}
func TestIPv4Receive(t *testing.T) {
- o := testObject{t: t, v4: true}
- proto := ipv4.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ v4: true,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, &nic.tester)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
totalLen := header.IPv4MinimumSize + 30
view := buffer.NewView(totalLen)
ip := header.IPv4(view)
@@ -308,10 +447,10 @@ func TestIPv4Receive(t *testing.T) {
}
// Give packet to ipv4 endpoint, dispatcher will validate that it's ok.
- o.protocol = 10
- o.srcAddr = remoteIpv4Addr
- o.dstAddr = localIpv4Addr
- o.contents = view[header.IPv4MinimumSize:totalLen]
+ nic.tester.protocol = 10
+ nic.tester.srcAddr = remoteIpv4Addr
+ nic.tester.dstAddr = localIpv4Addr
+ nic.tester.contents = view[header.IPv4MinimumSize:totalLen]
r, err := buildIPv4Route(localIpv4Addr, remoteIpv4Addr)
if err != nil {
@@ -324,8 +463,8 @@ func TestIPv4Receive(t *testing.T) {
t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
}
ep.HandlePacket(&r, pkt)
- if o.dataCalls != 1 {
- t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls)
+ if nic.tester.dataCalls != 1 {
+ t.Fatalf("Bad number of data calls: got %x, want 1", nic.tester.dataCalls)
}
}
@@ -355,11 +494,20 @@ func TestIPv4ReceiveControl(t *testing.T) {
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
- o := testObject{t: t}
- proto := ipv4.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, &nic.tester)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize
view := buffer.NewView(dataOffset + 8)
@@ -400,27 +548,37 @@ func TestIPv4ReceiveControl(t *testing.T) {
// Give packet to IPv4 endpoint, dispatcher will validate that
// it's ok.
- o.protocol = 10
- o.srcAddr = remoteIpv4Addr
- o.dstAddr = localIpv4Addr
- o.contents = view[dataOffset:]
- o.typ = c.expectedTyp
- o.extra = c.expectedExtra
+ nic.tester.protocol = 10
+ nic.tester.srcAddr = remoteIpv4Addr
+ nic.tester.dstAddr = localIpv4Addr
+ nic.tester.contents = view[dataOffset:]
+ nic.tester.typ = c.expectedTyp
+ nic.tester.extra = c.expectedExtra
ep.HandlePacket(&r, truncatedPacket(view, c.trunc, header.IPv4MinimumSize))
- if want := c.expectedCount; o.controlCalls != want {
- t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, o.controlCalls, want)
+ if want := c.expectedCount; nic.tester.controlCalls != want {
+ t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, nic.tester.controlCalls, want)
}
})
}
}
func TestIPv4FragmentationReceive(t *testing.T) {
- o := testObject{t: t, v4: true}
- proto := ipv4.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ v4: true,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, &nic.tester)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
totalLen := header.IPv4MinimumSize + 24
frag1 := buffer.NewView(totalLen)
@@ -457,10 +615,10 @@ func TestIPv4FragmentationReceive(t *testing.T) {
}
// Give packet to ipv4 endpoint, dispatcher will validate that it's ok.
- o.protocol = 10
- o.srcAddr = remoteIpv4Addr
- o.dstAddr = localIpv4Addr
- o.contents = append(frag1[header.IPv4MinimumSize:totalLen], frag2[header.IPv4MinimumSize:totalLen]...)
+ nic.tester.protocol = 10
+ nic.tester.srcAddr = remoteIpv4Addr
+ nic.tester.dstAddr = localIpv4Addr
+ nic.tester.contents = append(frag1[header.IPv4MinimumSize:totalLen], frag2[header.IPv4MinimumSize:totalLen]...)
r, err := buildIPv4Route(localIpv4Addr, remoteIpv4Addr)
if err != nil {
@@ -475,8 +633,8 @@ func TestIPv4FragmentationReceive(t *testing.T) {
t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
}
ep.HandlePacket(&r, pkt)
- if o.dataCalls != 0 {
- t.Fatalf("Bad number of data calls: got %x, want 0", o.dataCalls)
+ if nic.tester.dataCalls != 0 {
+ t.Fatalf("Bad number of data calls: got %x, want 0", nic.tester.dataCalls)
}
// Send second segment.
@@ -487,17 +645,26 @@ func TestIPv4FragmentationReceive(t *testing.T) {
t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
}
ep.HandlePacket(&r, pkt)
- if o.dataCalls != 1 {
- t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls)
+ if nic.tester.dataCalls != 1 {
+ t.Fatalf("Bad number of data calls: got %x, want 1", nic.tester.dataCalls)
}
}
func TestIPv6Send(t *testing.T) {
- o := testObject{t: t}
- proto := ipv6.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv6.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, nil)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
// Allocate and initialize the payload view.
payload := buffer.NewView(100)
for i := 0; i < len(payload); i++ {
@@ -511,10 +678,10 @@ func TestIPv6Send(t *testing.T) {
})
// Issue the write.
- o.protocol = 123
- o.srcAddr = localIpv6Addr
- o.dstAddr = remoteIpv6Addr
- o.contents = payload
+ nic.tester.protocol = 123
+ nic.tester.srcAddr = localIpv6Addr
+ nic.tester.dstAddr = remoteIpv6Addr
+ nic.tester.contents = payload
r, err := buildIPv6Route(localIpv6Addr, remoteIpv6Addr)
if err != nil {
@@ -530,11 +697,20 @@ func TestIPv6Send(t *testing.T) {
}
func TestIPv6Receive(t *testing.T) {
- o := testObject{t: t}
- proto := ipv6.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv6.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, &nic.tester)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
totalLen := header.IPv6MinimumSize + 30
view := buffer.NewView(totalLen)
ip := header.IPv6(view)
@@ -552,10 +728,10 @@ func TestIPv6Receive(t *testing.T) {
}
// Give packet to ipv6 endpoint, dispatcher will validate that it's ok.
- o.protocol = 10
- o.srcAddr = remoteIpv6Addr
- o.dstAddr = localIpv6Addr
- o.contents = view[header.IPv6MinimumSize:totalLen]
+ nic.tester.protocol = 10
+ nic.tester.srcAddr = remoteIpv6Addr
+ nic.tester.dstAddr = localIpv6Addr
+ nic.tester.contents = view[header.IPv6MinimumSize:totalLen]
r, err := buildIPv6Route(localIpv6Addr, remoteIpv6Addr)
if err != nil {
@@ -569,8 +745,8 @@ func TestIPv6Receive(t *testing.T) {
t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
}
ep.HandlePacket(&r, pkt)
- if o.dataCalls != 1 {
- t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls)
+ if nic.tester.dataCalls != 1 {
+ t.Fatalf("Bad number of data calls: got %x, want 1", nic.tester.dataCalls)
}
}
@@ -609,11 +785,20 @@ func TestIPv6ReceiveControl(t *testing.T) {
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
- o := testObject{t: t}
- proto := ipv6.NewProtocol()
- ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
+ s := buildDummyStack(t)
+ proto := s.NetworkProtocolInstance(ipv6.ProtocolNumber)
+ nic := testInterface{
+ tester: testObject{
+ t: t,
+ },
+ }
+ ep := proto.NewEndpoint(&nic, nil, nil, &nic.tester)
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
dataOffset := header.IPv6MinimumSize*2 + header.ICMPv6MinimumSize
if c.fragmentOffset != nil {
dataOffset += header.IPv6FragmentHeaderSize
@@ -666,19 +851,19 @@ func TestIPv6ReceiveControl(t *testing.T) {
// Give packet to IPv6 endpoint, dispatcher will validate that
// it's ok.
- o.protocol = 10
- o.srcAddr = remoteIpv6Addr
- o.dstAddr = localIpv6Addr
- o.contents = view[dataOffset:]
- o.typ = c.expectedTyp
- o.extra = c.expectedExtra
+ nic.tester.protocol = 10
+ nic.tester.srcAddr = remoteIpv6Addr
+ nic.tester.dstAddr = localIpv6Addr
+ nic.tester.contents = view[dataOffset:]
+ nic.tester.typ = c.expectedTyp
+ nic.tester.extra = c.expectedExtra
// Set ICMPv6 checksum.
icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIpv6Addr, buffer.VectorisedView{}))
ep.HandlePacket(&r, truncatedPacket(view, c.trunc, header.IPv6MinimumSize))
- if want := c.expectedCount; o.controlCalls != want {
- t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, o.controlCalls, want)
+ if want := c.expectedCount; nic.tester.controlCalls != want {
+ t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, nic.tester.controlCalls, want)
}
})
}
diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD
index f9c2aa980..0a7e98ed1 100644
--- a/pkg/tcpip/network/ipv4/BUILD
+++ b/pkg/tcpip/network/ipv4/BUILD
@@ -10,6 +10,7 @@ go_library(
],
visibility = ["//visibility:public"],
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 5fe73315f..5c4f715d7 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -40,7 +40,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
// Drop packet if it doesn't have the basic IPv4 header or if the
// original source address doesn't match an address we own.
src := hdr.SourceAddress()
- if e.stack.CheckLocalAddress(e.NICID(), ProtocolNumber, src) == 0 {
+ if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, src) == 0 {
return
}
@@ -110,7 +110,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
localAddr = ""
}
- r, err := r.Stack().FindRoute(e.NICID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+ r, err := r.Stack().FindRoute(e.nic.ID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
if err != nil {
// If we cannot find a route to the destination, silently drop the packet.
return
@@ -200,16 +200,6 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
// ======= ICMP Error packet generation =========
-// ReturnError implements stack.TransportProtocol.ReturnError.
-func (p *protocol) ReturnError(r *stack.Route, reason tcpip.ICMPReason, pkt *stack.PacketBuffer) *tcpip.Error {
- switch reason.(type) {
- case *tcpip.ICMPReasonPortUnreachable:
- return returnError(r, &icmpReasonPortUnreachable{}, pkt)
- default:
- return tcpip.ErrNotSupported
- }
-}
-
// icmpReason is a marker interface for IPv4 specific ICMP errors.
type icmpReason interface {
isICMPReason()
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 135444222..cf0a76274 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -12,17 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package ipv4 contains the implementation of the ipv4 network protocol. To use
-// it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing ipv4.NewProtocol() as one of the network
-// protocols when calling stack.New(). Then endpoints can be created by passing
-// ipv4.ProtocolNumber as the network protocol number when calling
-// Stack.NewEndpoint().
+// Package ipv4 contains the implementation of the ipv4 network protocol.
package ipv4
import (
+ "fmt"
"sync/atomic"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -51,22 +48,115 @@ const (
fragmentblockSize = 8
)
+var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix()
+
+var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
+var _ stack.AddressableEndpoint = (*endpoint)(nil)
+var _ stack.NetworkEndpoint = (*endpoint)(nil)
+
type endpoint struct {
- nicID tcpip.NICID
+ nic stack.NetworkInterface
linkEP stack.LinkEndpoint
dispatcher stack.TransportDispatcher
protocol *protocol
- stack *stack.Stack
+
+ // enabled is set to 1 when the enpoint is enabled and 0 when it is
+ // disabled.
+ //
+ // Must be accessed using atomic operations.
+ enabled uint32
+
+ mu struct {
+ sync.RWMutex
+
+ addressableEndpointState stack.AddressableEndpointState
+ }
}
// NewEndpoint creates a new ipv4 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
- return &endpoint{
- nicID: nicID,
- linkEP: linkEP,
+func (p *protocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
+ e := &endpoint{
+ nic: nic,
+ linkEP: nic.LinkEndpoint(),
dispatcher: dispatcher,
protocol: p,
- stack: st,
+ }
+ e.mu.addressableEndpointState.Init(e)
+ return e
+}
+
+// Enable implements stack.NetworkEndpoint.
+func (e *endpoint) Enable() *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ // If the NIC is not enabled, the endpoint can't do anything meaningful so
+ // don't enable the endpoint.
+ if !e.nic.Enabled() {
+ return tcpip.ErrNotPermitted
+ }
+
+ // If the endpoint is already enabled, there is nothing for it to do.
+ if !e.setEnabled(true) {
+ return nil
+ }
+
+ // Create an endpoint to receive broadcast packets on this interface.
+ ep, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.NeverPrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */)
+ if err != nil {
+ return err
+ }
+ // We have no need for the address endpoint.
+ ep.DecRef()
+
+ // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
+ // multicast group. Note, the IANA calls the all-hosts multicast group the
+ // all-systems multicast group.
+ _, err = e.mu.addressableEndpointState.JoinGroup(header.IPv4AllSystems)
+ return err
+}
+
+// Enabled implements stack.NetworkEndpoint.
+func (e *endpoint) Enabled() bool {
+ return e.nic.Enabled() && e.isEnabled()
+}
+
+// isEnabled returns true if the endpoint is enabled, regardless of the
+// enabled status of the NIC.
+func (e *endpoint) isEnabled() bool {
+ return atomic.LoadUint32(&e.enabled) == 1
+}
+
+// setEnabled sets the enabled status for the endpoint.
+//
+// Returns true if the enabled status was updated.
+func (e *endpoint) setEnabled(v bool) bool {
+ if v {
+ return atomic.SwapUint32(&e.enabled, 1) == 0
+ }
+ return atomic.SwapUint32(&e.enabled, 0) == 1
+}
+
+// Disable implements stack.NetworkEndpoint.
+func (e *endpoint) Disable() {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ e.disableLocked()
+}
+
+func (e *endpoint) disableLocked() {
+ if !e.setEnabled(false) {
+ return
+ }
+
+ // The endpoint may have already left the multicast group.
+ if _, err := e.mu.addressableEndpointState.LeaveGroup(header.IPv4AllSystems); err != nil && err != tcpip.ErrBadLocalAddress {
+ panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err))
+ }
+
+ // The address may have already been removed.
+ if err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err != nil && err != tcpip.ErrBadLocalAddress {
+ panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err))
}
}
@@ -81,16 +171,6 @@ func (e *endpoint) MTU() uint32 {
return calculateMTU(e.linkEP.MTU())
}
-// Capabilities implements stack.NetworkEndpoint.Capabilities.
-func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
- return e.linkEP.Capabilities()
-}
-
-// NICID returns the ID of the NIC this endpoint belongs to.
-func (e *endpoint) NICID() tcpip.NICID {
- return e.nicID
-}
-
// MaxHeaderLength returns the maximum length needed by ipv4 headers (and
// underlying protocols).
func (e *endpoint) MaxHeaderLength() uint16 {
@@ -232,8 +312,8 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// iptables filtering. All packets that reach here are locally
// generated.
- nicName := e.stack.FindNICNameFromID(e.NICID())
- ipt := e.stack.IPTables()
+ nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
+ ipt := e.protocol.stack.IPTables()
if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
// iptables is telling us to drop the packet.
r.Stats().IP.IPTablesOutputDropped.Increment()
@@ -249,7 +329,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// short circuits broadcasts before they are sent out to other hosts.
if pkt.NatDone {
netHeader := header.IPv4(pkt.NetworkHeader().View())
- ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress())
+ ep, err := e.protocol.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress())
if err == nil {
route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
ep.HandlePacket(&route, pkt)
@@ -289,10 +369,10 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
pkt = pkt.Next()
}
- nicName := e.stack.FindNICNameFromID(e.NICID())
+ nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
// iptables filtering. All packets that reach here are locally
// generated.
- ipt := e.stack.IPTables()
+ ipt := e.protocol.stack.IPTables()
dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
if len(dropped) == 0 && len(natPkts) == 0 {
// Fast path: If no packets are to be dropped then we can just invoke the
@@ -312,7 +392,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
}
if _, ok := natPkts[pkt]; ok {
netHeader := header.IPv4(pkt.NetworkHeader().View())
- if ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()); err == nil {
+ if ep, err := e.protocol.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()); err == nil {
src := netHeader.SourceAddress()
dst := netHeader.DestinationAddress()
route := r.ReverseRoute(src, dst)
@@ -389,6 +469,10 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
// HandlePacket is called by the link layer when new ipv4 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+ if !e.isEnabled() {
+ return
+ }
+
h := header.IPv4(pkt.NetworkHeader().View())
if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -397,7 +481,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
// iptables filtering. All packets that reach here are intended for
// this machine and will not be forwarded.
- ipt := e.stack.IPTables()
+ ipt := e.protocol.stack.IPTables()
if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
// iptables is telling us to drop the packet.
r.Stats().IP.IPTablesInputDropped.Increment()
@@ -463,21 +547,141 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
return
}
r.Stats().IP.PacketsDelivered.Increment()
- e.dispatcher.DeliverTransportPacket(r, p, pkt)
+
+ switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
+ case stack.TransportPacketHandled:
+ case stack.TransportPacketDestinationPortUnreachable:
+ // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination
+ // Unreachable messages with code:
+ // 3 (Port Unreachable), when the designated transport protocol
+ // (e.g., UDP) is unable to demultiplex the datagram but has no
+ // protocol mechanism to inform the sender.
+ _ = returnError(r, &icmpReasonPortUnreachable{}, pkt)
+ default:
+ panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
+ }
}
// Close cleans up resources associated with the endpoint.
-func (e *endpoint) Close() {}
+func (e *endpoint) Close() {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ e.disableLocked()
+ e.mu.addressableEndpointState.Cleanup()
+}
+
+// AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
+}
+
+// RemovePermanentAddress implements stack.AddressableEndpoint.
+func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.RemovePermanentAddress(addr)
+}
+
+// AcquireAssignedAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ loopback := e.nic.IsLoopback()
+ addressEndpoint := e.mu.addressableEndpointState.ReadOnly().AddrOrMatching(localAddr, allowTemp, func(addressEndpoint stack.AddressEndpoint) bool {
+ subnet := addressEndpoint.AddressWithPrefix().Subnet()
+ // IPv4 has a notion of a subnet broadcast address and considers the
+ // loopback interface bound to an address's whole subnet (on linux).
+ return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr))
+ })
+ if addressEndpoint != nil {
+ return addressEndpoint
+ }
+
+ if !allowTemp {
+ return nil
+ }
+
+ addr := localAddr.WithPrefix()
+ addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(addr, tempPEB)
+ if err != nil {
+ // AddAddress only returns an error if the address is already assigned,
+ // but we just checked above if the address exists so we expect no error.
+ panic(fmt.Sprintf("e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(%s, %d): %s", addr, tempPEB, err))
+ }
+ return addressEndpoint
+}
+
+// AcquirePrimaryAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AcquirePrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.AcquirePrimaryAddress(remoteAddr, allowExpired)
+}
+
+// PrimaryAddresses implements stack.AddressableEndpoint.
+func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.PrimaryAddresses()
+}
+
+// PermanentAddresses implements stack.AddressableEndpoint.
+func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.PermanentAddresses()
+}
+
+// JoinGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) JoinGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+ if !header.IsV4MulticastAddress(addr) {
+ return false, tcpip.ErrBadAddress
+ }
+
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.JoinGroup(addr)
+}
+
+// LeaveGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) LeaveGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.LeaveGroup(addr)
+}
+
+// IsInGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.IsInGroup(addr)
+}
+
+var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
+var _ stack.NetworkProtocol = (*protocol)(nil)
type protocol struct {
- ids []uint32
- hashIV uint32
+ stack *stack.Stack
// defaultTTL is the current default TTL for the protocol. Only the
- // uint8 portion of it is meaningful and it must be accessed
- // atomically.
+ // uint8 portion of it is meaningful.
+ //
+ // Must be accessed using atomic operations.
defaultTTL uint32
+ // forwarding is set to 1 when the protocol has forwarding enabled and 0
+ // when it is disabled.
+ //
+ // Must be accessed using atomic operations.
+ forwarding uint32
+
+ ids []uint32
+ hashIV uint32
+
fragmentation *fragmentation.Fragmentation
}
@@ -550,6 +754,20 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true
}
+// Forwarding implements stack.ForwardingNetworkProtocol.
+func (p *protocol) Forwarding() bool {
+ return uint8(atomic.LoadUint32(&p.forwarding)) == 1
+}
+
+// SetForwarding implements stack.ForwardingNetworkProtocol.
+func (p *protocol) SetForwarding(v bool) {
+ if v {
+ atomic.StoreUint32(&p.forwarding, 1)
+ } else {
+ atomic.StoreUint32(&p.forwarding, 0)
+ }
+}
+
// calculateMTU calculates the network-layer payload MTU based on the link-layer
// payload mtu.
func calculateMTU(mtu uint32) uint32 {
@@ -571,7 +789,7 @@ func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber, hashIV ui
}
// NewProtocol returns an IPv4 network protocol.
-func NewProtocol() stack.NetworkProtocol {
+func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
ids := make([]uint32, buckets)
// Randomly initialize hashIV and the ids.
@@ -582,9 +800,10 @@ func NewProtocol() stack.NetworkProtocol {
hashIV := r[buckets]
return &protocol{
+ stack: s,
ids: ids,
hashIV: hashIV,
defaultTTL: DefaultTTL,
- fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
+ fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout, s.Clock()),
}
}
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 86187aba8..0b3ed9483 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -36,8 +36,8 @@ import (
func TestExcludeBroadcast(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
const defaultMTU = 65536
@@ -517,8 +517,8 @@ func TestInvalidFragments(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{
- ipv4.NewProtocol(),
+ NetworkProtocols: []stack.NetworkProtocolFactory{
+ ipv4.NewProtocol,
},
})
e := channel.New(0, 1500, linkAddr)
@@ -929,8 +929,8 @@ func TestReceiveFragments(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
// Setup a stack and endpoint.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
e := channel.New(0, 1280, tcpip.LinkAddress("\xf0\x00"))
if err := s.CreateNIC(nicID, e); err != nil {
@@ -1140,7 +1140,7 @@ func TestWriteStats(t *testing.T) {
func buildRoute(t *testing.T, ep stack.LinkEndpoint) stack.Route {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
})
if err := s.CreateNIC(1, ep); err != nil {
t.Fatalf("CreateNIC(1, _) failed: %s", err)
diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD
index 8bd8f5c52..97adbcbd4 100644
--- a/pkg/tcpip/network/ipv6/BUILD
+++ b/pkg/tcpip/network/ipv6/BUILD
@@ -5,11 +5,14 @@ package(licenses = ["notice"])
go_library(
name = "ipv6",
srcs = [
+ "dhcpv6configurationfromndpra_string.go",
"icmp.go",
"ipv6.go",
+ "ndp.go",
],
visibility = ["//visibility:public"],
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go b/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go
index d199ded6a..09ba133b1 100644
--- a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go
+++ b/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go
@@ -14,7 +14,7 @@
// Code generated by "stringer -type DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT.
-package stack
+package ipv6
import "strconv"
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 072c8ccd7..4b4b483cc 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -15,6 +15,8 @@
package ipv6
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -39,7 +41,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
// Drop packet if it doesn't have the basic IPv6 header or if the
// original source address doesn't match an address we own.
src := hdr.SourceAddress()
- if e.stack.CheckLocalAddress(e.NICID(), ProtocolNumber, src) == 0 {
+ if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, src) == 0 {
return
}
@@ -207,14 +209,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
return
}
- s := r.Stack()
- if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil {
- // We will only get an error if the NIC is unrecognized, which should not
- // happen. For now, drop this packet.
- //
- // TODO(b/141002840): Handle this better?
- return
- } else if isTentative {
+ if e.hasTentativeAddr(targetAddr) {
// If the target address is tentative and the source of the packet is a
// unicast (specified) address, then the source of the packet is
// attempting to perform address resolution on the target. In this case,
@@ -227,7 +222,20 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// stack know so it can handle such a scenario and do nothing further with
// the NS.
if r.RemoteAddress == header.IPv6Any {
- s.DupTentativeAddrDetected(e.nicID, targetAddr)
+ // We would get an error if the address no longer exists or the address
+ // is no longer tentative (DAD resolved between the call to
+ // hasTentativeAddr and this point). Both of these are valid scenarios:
+ // 1) An address may be removed at any time.
+ // 2) As per RFC 4862 section 5.4, DAD is not a perfect:
+ // "Note that the method for detecting duplicates
+ // is not completely reliable, and it is possible that duplicate
+ // addresses will still exist"
+ //
+ // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
+ // address is detected for an assigned address.
+ if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+ panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
+ }
}
// Do not handle neighbor solicitations targeted to an address that is
@@ -240,7 +248,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// section 5.4.3.
// Is the NS targeting us?
- if s.CheckLocalAddress(e.nicID, ProtocolNumber, targetAddr) == 0 {
+ if r.Stack().CheckLocalAddress(e.nic.ID(), ProtocolNumber, targetAddr) == 0 {
return
}
@@ -275,7 +283,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
} else if e.nud != nil {
e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
} else {
- e.linkAddrCache.AddLinkAddress(e.nicID, r.RemoteAddress, sourceLinkAddr)
+ e.linkAddrCache.AddLinkAddress(e.nic.ID(), r.RemoteAddress, sourceLinkAddr)
}
// ICMPv6 Neighbor Solicit messages are always sent to
@@ -353,20 +361,26 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// NDP datagrams are very small and ToView() will not incur allocations.
na := header.NDPNeighborAdvert(payload.ToView())
targetAddr := na.TargetAddress()
- s := r.Stack()
-
- if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil {
- // We will only get an error if the NIC is unrecognized, which should not
- // happen. For now short-circuit this packet.
- //
- // TODO(b/141002840): Handle this better?
- return
- } else if isTentative {
+ if e.hasTentativeAddr(targetAddr) {
// We just got an NA from a node that owns an address we are performing
// DAD on, implying the address is not unique. In this case we let the
// stack know so it can handle such a scenario and do nothing furthur with
// the NDP NA.
- s.DupTentativeAddrDetected(e.nicID, targetAddr)
+ //
+ // We would get an error if the address no longer exists or the address
+ // is no longer tentative (DAD resolved between the call to
+ // hasTentativeAddr and this point). Both of these are valid scenarios:
+ // 1) An address may be removed at any time.
+ // 2) As per RFC 4862 section 5.4, DAD is not a perfect:
+ // "Note that the method for detecting duplicates
+ // is not completely reliable, and it is possible that duplicate
+ // addresses will still exist"
+ //
+ // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
+ // address is detected for an assigned address.
+ if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+ panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
+ }
return
}
@@ -396,7 +410,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// address cache with the link address for the target of the message.
if len(targetLinkAddr) != 0 {
if e.nud == nil {
- e.linkAddrCache.AddLinkAddress(e.nicID, targetAddr, targetLinkAddr)
+ e.linkAddrCache.AddLinkAddress(e.nic.ID(), targetAddr, targetLinkAddr)
return
}
@@ -424,7 +438,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
localAddr = ""
}
- r, err := r.Stack().FindRoute(e.NICID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+ r, err := r.Stack().FindRoute(e.nic.ID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
if err != nil {
// If we cannot find a route to the destination, silently drop the packet.
return
@@ -568,9 +582,9 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
e.nud.HandleProbe(routerAddr, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
}
- // Tell the NIC to handle the RA.
- stack := r.Stack()
- stack.HandleNDPRA(e.nicID, routerAddr, ra)
+ e.mu.Lock()
+ e.mu.ndp.handleRA(routerAddr, ra)
+ e.mu.Unlock()
case header.ICMPv6RedirectMsg:
// TODO(gvisor.dev/issue/2285): Call `e.nud.HandleProbe` after validating
@@ -671,16 +685,6 @@ func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo
// ======= ICMP Error packet generation =========
-// ReturnError implements stack.TransportProtocol.ReturnError.
-func (p *protocol) ReturnError(r *stack.Route, reason tcpip.ICMPReason, pkt *stack.PacketBuffer) *tcpip.Error {
- switch reason.(type) {
- case *tcpip.ICMPReasonPortUnreachable:
- return returnError(r, &icmpReasonPortUnreachable{}, pkt)
- default:
- return tcpip.ErrNotSupported
- }
-}
-
// icmpReason is a marker interface for IPv6 specific ICMP errors.
type icmpReason interface {
isICMPReason()
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 0f50bfb8e..31370c1d4 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -75,7 +75,8 @@ type stubDispatcher struct {
stack.TransportDispatcher
}
-func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, *stack.PacketBuffer) {
+func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, *stack.PacketBuffer) stack.TransportPacketDisposition {
+ return stack.TransportPacketHandled
}
type stubLinkAddressCache struct {
@@ -102,6 +103,30 @@ func (*stubNUDHandler) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.Lin
func (*stubNUDHandler) HandleUpperLevelConfirmation(addr tcpip.Address) {
}
+var _ stack.NetworkInterface = (*testInterface)(nil)
+
+type testInterface struct{}
+
+func (*testInterface) ID() tcpip.NICID {
+ return 0
+}
+
+func (*testInterface) IsLoopback() bool {
+ return false
+}
+
+func (*testInterface) Name() string {
+ return ""
+}
+
+func (*testInterface) Enabled() bool {
+ return true
+}
+
+func (*testInterface) LinkEndpoint() stack.LinkEndpoint {
+ return nil
+}
+
func TestICMPCounts(t *testing.T) {
tests := []struct {
name string
@@ -120,8 +145,8 @@ func TestICMPCounts(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
UseNeighborCache: test.useNeighborCache,
})
{
@@ -149,9 +174,13 @@ func TestICMPCounts(t *testing.T) {
if netProto == nil {
t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
}
- ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
+ ep := netProto.NewEndpoint(&testInterface{}, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{})
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
if err != nil {
t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
@@ -258,8 +287,8 @@ func TestICMPCounts(t *testing.T) {
func TestICMPCountsWithNeighborCache(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
UseNeighborCache: true,
})
{
@@ -287,9 +316,13 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
if netProto == nil {
t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
}
- ep := netProto.NewEndpoint(0, nil, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
+ ep := netProto.NewEndpoint(&testInterface{}, nil, &stubNUDHandler{}, &stubDispatcher{})
defer ep.Close()
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+
r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
if err != nil {
t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
@@ -423,12 +456,12 @@ func (e endpointWithResolutionCapability) Capabilities() stack.LinkEndpointCapab
func newTestContext(t *testing.T) *testContext {
c := &testContext{
s0: stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
}),
s1: stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
}),
}
@@ -723,7 +756,7 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
UseNeighborCache: test.useNeighborCache,
})
if isRouter {
@@ -919,7 +952,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
t.Run(typ.name, func(t *testing.T) {
e := channel.New(10, 1280, linkAddr0)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(_, _) = %s", err)
@@ -1097,7 +1130,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
t.Run(typ.name, func(t *testing.T) {
e := channel.New(10, 1280, linkAddr0)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -1203,7 +1236,10 @@ func TestLinkAddressRequest(t *testing.T) {
}
for _, test := range tests {
- p := NewProtocol()
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ })
+ p := s.NetworkProtocolInstance(ProtocolNumber)
linkRes, ok := p.(stack.LinkAddressResolver)
if !ok {
t.Fatalf("expected IPv6 protocol to implement stack.LinkAddressResolver")
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 5b1cca180..990e67210 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -12,18 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package ipv6 contains the implementation of the ipv6 network protocol. To use
-// it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing ipv6.NewProtocol() as one of the network
-// protocols when calling stack.New(). Then endpoints can be created by passing
-// ipv6.ProtocolNumber as the network protocol number when calling
-// Stack.NewEndpoint().
+// Package ipv6 contains the implementation of the ipv6 network protocol.
package ipv6
import (
"fmt"
+ "sort"
"sync/atomic"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -45,14 +42,302 @@ const (
DefaultTTL = 64
)
+var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
+var _ stack.AddressableEndpoint = (*endpoint)(nil)
+var _ stack.NetworkEndpoint = (*endpoint)(nil)
+var _ stack.NDPEndpoint = (*endpoint)(nil)
+var _ NDPEndpoint = (*endpoint)(nil)
+
type endpoint struct {
- nicID tcpip.NICID
+ nic stack.NetworkInterface
linkEP stack.LinkEndpoint
linkAddrCache stack.LinkAddressCache
nud stack.NUDHandler
dispatcher stack.TransportDispatcher
protocol *protocol
stack *stack.Stack
+
+ // enabled is set to 1 when the endpoint is enabled and 0 when it is
+ // disabled.
+ //
+ // Must be accessed using atomic operations.
+ enabled uint32
+
+ mu struct {
+ sync.RWMutex
+
+ addressableEndpointState stack.AddressableEndpointState
+ ndp ndpState
+ }
+}
+
+// NICNameFromID is a function that returns a stable name for the specified NIC,
+// even if different NIC IDs are used to refer to the same NIC in different
+// program runs. It is used when generating opaque interface identifiers (IIDs).
+// If the NIC was created with a name, it is passed to NICNameFromID.
+//
+// NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are
+// generated for the same prefix on differnt NICs.
+type NICNameFromID func(tcpip.NICID, string) string
+
+// OpaqueInterfaceIdentifierOptions holds the options related to the generation
+// of opaque interface indentifiers (IIDs) as defined by RFC 7217.
+type OpaqueInterfaceIdentifierOptions struct {
+ // NICNameFromID is a function that returns a stable name for a specified NIC,
+ // even if the NIC ID changes over time.
+ //
+ // Must be specified to generate the opaque IID.
+ NICNameFromID NICNameFromID
+
+ // SecretKey is a pseudo-random number used as the secret key when generating
+ // opaque IIDs as defined by RFC 7217. The key SHOULD be at least
+ // header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness
+ // requirements for security as outlined by RFC 4086. SecretKey MUST NOT
+ // change between program runs, unless explicitly changed.
+ //
+ // OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey
+ // MUST NOT be modified after Stack is created.
+ //
+ // May be nil, but a nil value is highly discouraged to maintain
+ // some level of randomness between nodes.
+ SecretKey []byte
+}
+
+// InvalidateDefaultRouter implements stack.NDPEndpoint.
+func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ e.mu.ndp.invalidateDefaultRouter(rtr)
+}
+
+// SetNDPConfigurations implements NDPEndpoint.
+func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) {
+ c.validate()
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ e.mu.ndp.configs = c
+}
+
+// hasTentativeAddr returns true if addr is tentative on e.
+func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool {
+ e.mu.RLock()
+ addressEndpoint := e.getAddressRLocked(addr)
+ e.mu.RUnlock()
+ return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative
+}
+
+// dupTentativeAddrDetected attempts to inform e that a tentative addr is a
+// duplicate on a link.
+//
+// dupTentativeAddrDetected removes the tentative address if it exists. If the
+// address was generated via SLAAC, an attempt is made to generate a new
+// address.
+func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ addressEndpoint := e.getAddressRLocked(addr)
+ if addressEndpoint == nil {
+ return tcpip.ErrBadAddress
+ }
+
+ if addressEndpoint.GetKind() != stack.PermanentTentative {
+ return tcpip.ErrInvalidEndpointState
+ }
+
+ // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
+ // attempt will be made to generate a new address for it.
+ if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
+ return err
+ }
+
+ prefix := addressEndpoint.AddressWithPrefix().Subnet()
+
+ switch t := addressEndpoint.ConfigType(); t {
+ case stack.AddressConfigStatic:
+ case stack.AddressConfigSlaac:
+ e.mu.ndp.regenerateSLAACAddr(prefix)
+ case stack.AddressConfigSlaacTemp:
+ // Do not reset the generation attempts counter for the prefix as the
+ // temporary address is being regenerated in response to a DAD conflict.
+ e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
+ default:
+ panic(fmt.Sprintf("unrecognized address config type = %d", t))
+ }
+
+ return nil
+}
+
+// transitionForwarding transitions the endpoint's forwarding status to
+// forwarding.
+//
+// Must only be called when the forwarding status changes.
+func (e *endpoint) transitionForwarding(forwarding bool) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ if !e.Enabled() {
+ return
+ }
+
+ if forwarding {
+ // When transitioning into an IPv6 router, host-only state (NDP discovered
+ // routers, discovered on-link prefixes, and auto-generated addresses) is
+ // cleaned up/invalidated and NDP router solicitations are stopped.
+ e.mu.ndp.stopSolicitingRouters()
+ e.mu.ndp.cleanupState(true /* hostOnly */)
+ } else {
+ // When transitioning into an IPv6 host, NDP router solicitations are
+ // started.
+ e.mu.ndp.startSolicitingRouters()
+ }
+}
+
+// Enable implements stack.NetworkEndpoint.
+func (e *endpoint) Enable() *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ // If the NIC is not enabled, the endpoint can't do anything meaningful so
+ // don't enable the endpoint.
+ if !e.nic.Enabled() {
+ return tcpip.ErrNotPermitted
+ }
+
+ // If the endpoint is already enabled, there is nothing for it to do.
+ if !e.setEnabled(true) {
+ return nil
+ }
+
+ // Join the IPv6 All-Nodes Multicast group if the stack is configured to
+ // use IPv6. This is required to ensure that this node properly receives
+ // and responds to the various NDP messages that are destined to the
+ // all-nodes multicast address. An example is the Neighbor Advertisement
+ // when we perform Duplicate Address Detection, or Router Advertisement
+ // when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
+ // section 4.2 for more information.
+ //
+ // Also auto-generate an IPv6 link-local address based on the endpoint's
+ // link address if it is configured to do so. Note, each interface is
+ // required to have IPv6 link-local unicast address, as per RFC 4291
+ // section 2.1.
+
+ // Join the All-Nodes multicast group before starting DAD as responses to DAD
+ // (NDP NS) messages may be sent to the All-Nodes multicast group if the
+ // source address of the NDP NS is the unspecified address, as per RFC 4861
+ // section 7.2.4.
+ if _, err := e.mu.addressableEndpointState.JoinGroup(header.IPv6AllNodesMulticastAddress); err != nil {
+ return err
+ }
+
+ // Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
+ // state.
+ //
+ // Addresses may have aleady completed DAD but in the time since the endpoint
+ // was last enabled, other devices may have acquired the same addresses.
+ var err *tcpip.Error
+ e.mu.addressableEndpointState.ReadOnly().ForEach(func(addressEndpoint stack.AddressEndpoint) bool {
+ addr := addressEndpoint.AddressWithPrefix().Address
+ if !header.IsV6UnicastAddress(addr) {
+ return true
+ }
+
+ switch addressEndpoint.GetKind() {
+ case stack.Permanent:
+ addressEndpoint.SetKind(stack.PermanentTentative)
+ fallthrough
+ case stack.PermanentTentative:
+ err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint)
+ return err == nil
+ default:
+ return true
+ }
+ })
+ if err != nil {
+ return err
+ }
+
+ // Do not auto-generate an IPv6 link-local address for loopback devices.
+ if e.protocol.autoGenIPv6LinkLocal {
+ // The valid and preferred lifetime is infinite for the auto-generated
+ // link-local address.
+ e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
+ }
+
+ // If we are operating as a router, then do not solicit routers since we
+ // won't process the RAs anyway.
+ //
+ // Routers do not process Router Advertisements (RA) the same way a host
+ // does. That is, routers do not learn from RAs (e.g. on-link prefixes
+ // and default routers). Therefore, soliciting RAs from other routers on
+ // a link is unnecessary for routers.
+ if !e.protocol.Forwarding() {
+ e.mu.ndp.startSolicitingRouters()
+ }
+
+ return nil
+}
+
+// Enabled implements stack.NetworkEndpoint.
+func (e *endpoint) Enabled() bool {
+ return e.nic.Enabled() && e.isEnabled()
+}
+
+// isEnabled returns true if the endpoint is enabled, regardless of the
+// enabled status of the NIC.
+func (e *endpoint) isEnabled() bool {
+ return atomic.LoadUint32(&e.enabled) == 1
+}
+
+// setEnabled sets the enabled status for the endpoint.
+//
+// Returns true if the enabled status was updated.
+func (e *endpoint) setEnabled(v bool) bool {
+ if v {
+ return atomic.SwapUint32(&e.enabled, 1) == 0
+ }
+ return atomic.SwapUint32(&e.enabled, 0) == 1
+}
+
+// Disable implements stack.NetworkEndpoint.
+func (e *endpoint) Disable() {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ e.disableLocked()
+}
+
+func (e *endpoint) disableLocked() {
+ if !e.setEnabled(false) {
+ return
+ }
+
+ e.mu.ndp.stopSolicitingRouters()
+ e.mu.ndp.cleanupState(false /* hostOnly */)
+ e.stopDADForPermanentAddressesLocked()
+
+ // The endpoint may have already left the multicast group.
+ if _, err := e.mu.addressableEndpointState.LeaveGroup(header.IPv6AllNodesMulticastAddress); err != nil && err != tcpip.ErrBadLocalAddress {
+ panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
+ }
+}
+
+// stopDADForPermanentAddressesLocked stops DAD for all permaneent addresses.
+//
+// Precondition: e.mu must be write locked.
+func (e *endpoint) stopDADForPermanentAddressesLocked() {
+ // Stop DAD for all the tentative unicast addresses.
+ e.mu.addressableEndpointState.ReadOnly().ForEach(func(addressEndpoint stack.AddressEndpoint) bool {
+ if addressEndpoint.GetKind() != stack.PermanentTentative {
+ return true
+ }
+
+ addr := addressEndpoint.AddressWithPrefix().Address
+ if header.IsV6UnicastAddress(addr) {
+ e.mu.ndp.stopDuplicateAddressDetection(addr)
+ }
+
+ return true
+ })
}
// DefaultTTL is the default hop limit for this endpoint.
@@ -66,16 +351,6 @@ func (e *endpoint) MTU() uint32 {
return calculateMTU(e.linkEP.MTU())
}
-// NICID returns the ID of the NIC this endpoint belongs to.
-func (e *endpoint) NICID() tcpip.NICID {
- return e.nicID
-}
-
-// Capabilities implements stack.NetworkEndpoint.Capabilities.
-func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
- return e.linkEP.Capabilities()
-}
-
// MaxHeaderLength returns the maximum length needed by ipv6 headers (and
// underlying protocols).
func (e *endpoint) MaxHeaderLength() uint16 {
@@ -110,8 +385,8 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// iptables filtering. All packets that reach here are locally
// generated.
- nicName := e.stack.FindNICNameFromID(e.NICID())
- ipt := e.stack.IPTables()
+ nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
+ ipt := e.protocol.stack.IPTables()
if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
// iptables is telling us to drop the packet.
r.Stats().IP.IPTablesOutputDropped.Increment()
@@ -127,7 +402,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// short circuits broadcasts before they are sent out to other hosts.
if pkt.NatDone {
netHeader := header.IPv6(pkt.NetworkHeader().View())
- if ep, err := e.stack.FindNetworkEndpoint(header.IPv6ProtocolNumber, netHeader.DestinationAddress()); err == nil {
+ if ep, err := e.protocol.stack.FindNetworkEndpoint(header.IPv6ProtocolNumber, netHeader.DestinationAddress()); err == nil {
route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
ep.HandlePacket(&route, pkt)
return nil
@@ -170,8 +445,8 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
// iptables filtering. All packets that reach here are locally
// generated.
- nicName := e.stack.FindNICNameFromID(e.NICID())
- ipt := e.stack.IPTables()
+ nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
+ ipt := e.protocol.stack.IPTables()
dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
if len(dropped) == 0 && len(natPkts) == 0 {
// Fast path: If no packets are to be dropped then we can just invoke the
@@ -191,7 +466,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
}
if _, ok := natPkts[pkt]; ok {
netHeader := header.IPv6(pkt.NetworkHeader().View())
- if ep, err := e.stack.FindNetworkEndpoint(header.IPv6ProtocolNumber, netHeader.DestinationAddress()); err == nil {
+ if ep, err := e.protocol.stack.FindNetworkEndpoint(header.IPv6ProtocolNumber, netHeader.DestinationAddress()); err == nil {
src := netHeader.SourceAddress()
dst := netHeader.DestinationAddress()
route := r.ReverseRoute(src, dst)
@@ -224,6 +499,10 @@ func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuff
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+ if !e.isEnabled() {
+ return
+ }
+
h := header.IPv6(pkt.NetworkHeader().View())
if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -241,8 +520,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
hasFragmentHeader := false
// iptables filtering. All packets that reach here are intended for
- // this machine and will not be forwarded.
- ipt := e.stack.IPTables()
+ // this machine and need not be forwarded.
+ ipt := e.protocol.stack.IPTables()
if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
// iptables is telling us to drop the packet.
r.Stats().IP.IPTablesInputDropped.Increment()
@@ -482,7 +761,18 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
r.Stats().IP.PacketsDelivered.Increment()
// TODO(b/152019344): Send an ICMPv6 Parameter Problem, Code 1 error
// in response to unrecognized next header values.
- e.dispatcher.DeliverTransportPacket(r, p, pkt)
+ switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
+ case stack.TransportPacketHandled:
+ case stack.TransportPacketDestinationPortUnreachable:
+ // As per RFC 4443 section 3.1:
+ // A destination node SHOULD originate a Destination Unreachable
+ // message with Code 4 in response to a packet for which the
+ // transport protocol (e.g., UDP) has no listener, if that transport
+ // protocol has no alternative means to inform the sender.
+ _ = returnError(r, &icmpReasonPortUnreachable{}, pkt)
+ default:
+ panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
+ }
}
default:
@@ -498,19 +788,333 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
}
// Close cleans up resources associated with the endpoint.
-func (*endpoint) Close() {}
+func (e *endpoint) Close() {
+ e.mu.Lock()
+ e.disableLocked()
+ e.mu.ndp.removeSLAACAddresses(false /* keepLinkLocal */)
+ e.stopDADForPermanentAddressesLocked()
+ e.mu.addressableEndpointState.Cleanup()
+ e.mu.Unlock()
+
+ e.protocol.forgetEndpoint(e)
+}
// NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
return e.protocol.Number()
}
+// AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) {
+ // TODO(b/169350103): add checks here after making sure we no longer receive
+ // an empty address.
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.addAndAcquirePermanentAddressLocked(addr, peb, configType, deprecated)
+}
+
+// addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but
+// with locking requirements.
+//
+// addAndAcquirePermanentAddressLocked also joins the passed address's
+// solicited-node multicast group and start duplicate address detection.
+//
+// Precondition: e.mu must be write locked.
+func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) {
+ addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
+ if err != nil {
+ return nil, err
+ }
+
+ if !header.IsV6UnicastAddress(addr.Address) {
+ return addressEndpoint, nil
+ }
+
+ snmc := header.SolicitedNodeAddr(addr.Address)
+ if _, err := e.mu.addressableEndpointState.JoinGroup(snmc); err != nil {
+ return nil, err
+ }
+
+ addressEndpoint.SetKind(stack.PermanentTentative)
+
+ if e.Enabled() {
+ if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil {
+ return nil, err
+ }
+ }
+
+ return addressEndpoint, nil
+}
+
+// RemovePermanentAddress implements stack.AddressableEndpoint.
+func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ addressEndpoint := e.getAddressRLocked(addr)
+ if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() {
+ return tcpip.ErrBadLocalAddress
+ }
+
+ return e.removePermanentEndpointLocked(addressEndpoint, true)
+}
+
+// removePermanentEndpointLocked is like removePermanentAddressLocked except
+// it works with a stack.AddressEndpoint.
+//
+// Precondition: e.mu must be write locked.
+func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool) *tcpip.Error {
+ addr := addressEndpoint.AddressWithPrefix()
+ unicast := header.IsV6UnicastAddress(addr.Address)
+ if unicast {
+ e.mu.ndp.stopDuplicateAddressDetection(addr.Address)
+
+ // If we are removing an address generated via SLAAC, cleanup
+ // its SLAAC resources and notify the integrator.
+ switch addressEndpoint.ConfigType() {
+ case stack.AddressConfigSlaac:
+ e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
+ case stack.AddressConfigSlaacTemp:
+ e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
+ }
+ }
+
+ if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint); err != nil {
+ return err
+ }
+
+ if !unicast {
+ return nil
+ }
+
+ snmc := header.SolicitedNodeAddr(addr.Address)
+ if _, err := e.mu.addressableEndpointState.LeaveGroup(snmc); err != nil && err != tcpip.ErrBadLocalAddress {
+ return err
+ }
+
+ return nil
+}
+
+// hasPermanentAddressLocked returns true if the endpoint has a permanent
+// address equal to the passed address.
+//
+// Precondition: e.mu must be read or write locked.
+func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool {
+ addressEndpoint := e.getAddressRLocked(addr)
+ if addressEndpoint == nil {
+ return false
+ }
+ return addressEndpoint.GetKind().IsPermanent()
+}
+
+// getAddressRLocked returns the endpoint for the passed address.
+//
+// Precondition: e.mu must be read or write locked.
+func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint {
+ return e.mu.addressableEndpointState.ReadOnly().Lookup(localAddr)
+}
+
+// AcquireAssignedAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB)
+}
+
+// acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with
+// locking requirements.
+//
+// Precondition: e.mu must be write locked.
+func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
+ return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB)
+}
+
+// AcquirePrimaryAddress implements stack.AddressableEndpoint.
+func (e *endpoint) AcquirePrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.acquirePrimaryAddressRLocked(remoteAddr, allowExpired)
+}
+
+// acquirePrimaryAddressRLocked is like AcquirePrimaryAddress but with locking
+// requirements.
+//
+// Precondition: e.mu must be read locked.
+func (e *endpoint) acquirePrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
+ // addrCandidate is a candidate for Source Address Selection, as per
+ // RFC 6724 section 5.
+ type addrCandidate struct {
+ addressEndpoint stack.AddressEndpoint
+ scope header.IPv6AddressScope
+ }
+
+ if len(remoteAddr) == 0 {
+ return e.mu.addressableEndpointState.AcquirePrimaryAddress(remoteAddr, allowExpired)
+ }
+
+ // Create a candidate set of available addresses we can potentially use as a
+ // source address.
+ var cs []addrCandidate
+ e.mu.addressableEndpointState.ReadOnly().ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) {
+ // If r is not valid for outgoing connections, it is not a valid endpoint.
+ if !addressEndpoint.IsAssigned(allowExpired) {
+ return
+ }
+
+ addr := addressEndpoint.AddressWithPrefix().Address
+ scope, err := header.ScopeForIPv6Address(addr)
+ if err != nil {
+ // Should never happen as we got r from the primary IPv6 endpoint list and
+ // ScopeForIPv6Address only returns an error if addr is not an IPv6
+ // address.
+ panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
+ }
+
+ cs = append(cs, addrCandidate{
+ addressEndpoint: addressEndpoint,
+ scope: scope,
+ })
+ })
+
+ remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
+ if err != nil {
+ // primaryIPv6Endpoint should never be called with an invalid IPv6 address.
+ panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
+ }
+
+ // Sort the addresses as per RFC 6724 section 5 rules 1-3.
+ //
+ // TODO(b/146021396): Implement rules 4-8 of RFC 6724 section 5.
+ sort.Slice(cs, func(i, j int) bool {
+ sa := cs[i]
+ sb := cs[j]
+
+ // Prefer same address as per RFC 6724 section 5 rule 1.
+ if sa.addressEndpoint.AddressWithPrefix().Address == remoteAddr {
+ return true
+ }
+ if sb.addressEndpoint.AddressWithPrefix().Address == remoteAddr {
+ return false
+ }
+
+ // Prefer appropriate scope as per RFC 6724 section 5 rule 2.
+ if sa.scope < sb.scope {
+ return sa.scope >= remoteScope
+ } else if sb.scope < sa.scope {
+ return sb.scope < remoteScope
+ }
+
+ // Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
+ if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep {
+ // If sa is not deprecated, it is preferred over sb.
+ return sbDep
+ }
+
+ // Prefer temporary addresses as per RFC 6724 section 5 rule 7.
+ if saTemp, sbTemp := sa.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp, sb.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp; saTemp != sbTemp {
+ return saTemp
+ }
+
+ // sa and sb are equal, return the endpoint that is closest to the front of
+ // the primary endpoint list.
+ return i < j
+ })
+
+ // Return the most preferred address that can have its reference count
+ // incremented.
+ for _, c := range cs {
+ if c.addressEndpoint.IncRef() {
+ return c.addressEndpoint
+ }
+ }
+
+ return nil
+}
+
+// PrimaryAddresses implements stack.AddressableEndpoint.
+func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.PrimaryAddresses()
+}
+
+// PermanentAddresses implements stack.AddressableEndpoint.
+func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.PermanentAddresses()
+}
+
+// JoinGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) JoinGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+ if !header.IsV6MulticastAddress(addr) {
+ return false, tcpip.ErrBadAddress
+ }
+
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.JoinGroup(addr)
+}
+
+// LeaveGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) LeaveGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+ return e.mu.addressableEndpointState.LeaveGroup(addr)
+}
+
+// IsInGroup implements stack.GroupAddressableEndpoint.
+func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+ return e.mu.addressableEndpointState.IsInGroup(addr)
+}
+
+var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
+var _ stack.NetworkProtocol = (*protocol)(nil)
+
type protocol struct {
+ stack *stack.Stack
+
+ mu struct {
+ sync.RWMutex
+
+ eps map[*endpoint]struct{}
+ }
+
// defaultTTL is the current default TTL for the protocol. Only the
- // uint8 portion of it is meaningful and it must be accessed
- // atomically.
- defaultTTL uint32
+ // uint8 portion of it is meaningful.
+ //
+ // Must be accessed using atomic operations.
+ defaultTTL uint32
+
+ // forwarding is set to 1 when the protocol has forwarding enabled and 0
+ // when it is disabled.
+ //
+ // Must be accessed using atomic operations.
+ forwarding uint32
+
fragmentation *fragmentation.Fragmentation
+
+ // ndpDisp is the NDP event dispatcher that is used to send the netstack
+ // integrator NDP related events.
+ ndpDisp NDPDispatcher
+
+ // ndpConfigs is the default NDP configurations used by an IPv6 endpoint.
+ ndpConfigs NDPConfigurations
+
+ // opaqueIIDOpts hold the options for generating opaque interface identifiers
+ // (IIDs) as outlined by RFC 7217.
+ opaqueIIDOpts OpaqueInterfaceIdentifierOptions
+
+ // tempIIDSeed is used to seed the initial temporary interface identifier
+ // history value used to generate IIDs for temporary SLAAC addresses.
+ tempIIDSeed []byte
+
+ // autoGenIPv6LinkLocal determines whether or not the stack attempts to
+ // auto-generate an IPv6 link-local address for newly enabled non-loopback
+ // NICs. See the AutoGenIPv6LinkLocal field of Options for more details.
+ autoGenIPv6LinkLocal bool
}
// Number returns the ipv6 protocol number.
@@ -535,16 +1139,36 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
}
// NewEndpoint creates a new ipv6 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
- return &endpoint{
- nicID: nicID,
- linkEP: linkEP,
+func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
+ e := &endpoint{
+ nic: nic,
+ linkEP: nic.LinkEndpoint(),
linkAddrCache: linkAddrCache,
nud: nud,
dispatcher: dispatcher,
protocol: p,
- stack: st,
}
+ e.mu.addressableEndpointState.Init(e)
+ e.mu.ndp = ndpState{
+ ep: e,
+ configs: p.ndpConfigs,
+ dad: make(map[tcpip.Address]dadState),
+ defaultRouters: make(map[tcpip.Address]defaultRouterState),
+ onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState),
+ slaacPrefixes: make(map[tcpip.Subnet]slaacPrefixState),
+ }
+ e.mu.ndp.initializeTempAddrState()
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.mu.eps[e] = struct{}{}
+ return e
+}
+
+func (p *protocol) forgetEndpoint(e *endpoint) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ delete(p.mu.eps, e)
}
// SetOption implements NetworkProtocol.SetOption.
@@ -595,6 +1219,35 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
return proto, !fragMore && fragOffset == 0, true
}
+// Forwarding implements stack.ForwardingNetworkProtocol.
+func (p *protocol) Forwarding() bool {
+ return uint8(atomic.LoadUint32(&p.forwarding)) == 1
+}
+
+// setForwarding sets the forwarding status for the protocol.
+//
+// Returns true if the forwarding status was updated.
+func (p *protocol) setForwarding(v bool) bool {
+ if v {
+ return atomic.SwapUint32(&p.forwarding, 1) == 0
+ }
+ return atomic.SwapUint32(&p.forwarding, 0) == 1
+}
+
+// SetForwarding implements stack.ForwardingNetworkProtocol.
+func (p *protocol) SetForwarding(v bool) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if !p.setForwarding(v) {
+ return
+ }
+
+ for ep := range p.mu.eps {
+ ep.transitionForwarding(v)
+ }
+}
+
// calculateMTU calculates the network-layer payload MTU based on the link-layer
// payload mtu.
func calculateMTU(mtu uint32) uint32 {
@@ -605,10 +1258,69 @@ func calculateMTU(mtu uint32) uint32 {
return maxPayloadSize
}
-// NewProtocol returns an IPv6 network protocol.
-func NewProtocol() stack.NetworkProtocol {
- return &protocol{
- defaultTTL: DefaultTTL,
- fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
+// Options holds options to configure a new protocol.
+type Options struct {
+ // NDPConfigs is the default NDP configurations used by interfaces.
+ NDPConfigs NDPConfigurations
+
+ // AutoGenIPv6LinkLocal determines whether or not the stack attempts to
+ // auto-generate an IPv6 link-local address for newly enabled non-loopback
+ // NICs.
+ //
+ // Note, setting this to true does not mean that a link-local address is
+ // assigned right away, or at all. If Duplicate Address Detection is enabled,
+ // an address is only assigned if it successfully resolves. If it fails, no
+ // further attempts are made to auto-generate an IPv6 link-local adddress.
+ //
+ // The generated link-local address follows RFC 4291 Appendix A guidelines.
+ AutoGenIPv6LinkLocal bool
+
+ // NDPDisp is the NDP event dispatcher that an integrator can provide to
+ // receive NDP related events.
+ NDPDisp NDPDispatcher
+
+ // OpaqueIIDOpts hold the options for generating opaque interface
+ // identifiers (IIDs) as outlined by RFC 7217.
+ OpaqueIIDOpts OpaqueInterfaceIdentifierOptions
+
+ // TempIIDSeed is used to seed the initial temporary interface identifier
+ // history value used to generate IIDs for temporary SLAAC addresses.
+ //
+ // Temporary SLAAC adresses are short-lived addresses which are unpredictable
+ // and random from the perspective of other nodes on the network. It is
+ // recommended that the seed be a random byte buffer of at least
+ // header.IIDSize bytes to make sure that temporary SLAAC addresses are
+ // sufficiently random. It should follow minimum randomness requirements for
+ // security as outlined by RFC 4086.
+ //
+ // Note: using a nil value, the same seed across netstack program runs, or a
+ // seed that is too small would reduce randomness and increase predictability,
+ // defeating the purpose of temporary SLAAC addresses.
+ TempIIDSeed []byte
+}
+
+// NewProtocolWithOptions returns an IPv6 network protocol.
+func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
+ opts.NDPConfigs.validate()
+
+ return func(s *stack.Stack) stack.NetworkProtocol {
+ p := &protocol{
+ stack: s,
+ fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout, s.Clock()),
+
+ ndpDisp: opts.NDPDisp,
+ ndpConfigs: opts.NDPConfigs,
+ opaqueIIDOpts: opts.OpaqueIIDOpts,
+ tempIIDSeed: opts.TempIIDSeed,
+ autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal,
+ }
+ p.mu.eps = make(map[*endpoint]struct{})
+ p.SetDefaultTTL(DefaultTTL)
+ return p
}
}
+
+// NewProtocol is equivalent to NewProtocolWithOptions with an empty Options.
+func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
+ return NewProtocolWithOptions(Options{})(s)
+}
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 7d138dadb..d85b5c00f 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -141,18 +141,18 @@ func testReceiveUDP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
func TestReceiveOnAllNodesMulticastAddr(t *testing.T) {
tests := []struct {
name string
- protocolFactory stack.TransportProtocol
+ protocolFactory stack.TransportProtocolFactory
rxf func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
}{
- {"ICMP", icmp.NewProtocol6(), testReceiveICMP},
- {"UDP", udp.NewProtocol(), testReceiveUDP},
+ {"ICMP", icmp.NewProtocol6, testReceiveICMP},
+ {"UDP", udp.NewProtocol, testReceiveUDP},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{test.protocolFactory},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
})
e := channel.New(10, 1280, linkAddr1)
if err := s.CreateNIC(1, e); err != nil {
@@ -174,11 +174,11 @@ func TestReceiveOnSolicitedNodeAddr(t *testing.T) {
tests := []struct {
name string
- protocolFactory stack.TransportProtocol
+ protocolFactory stack.TransportProtocolFactory
rxf func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
}{
- {"ICMP", icmp.NewProtocol6(), testReceiveICMP},
- {"UDP", udp.NewProtocol(), testReceiveUDP},
+ {"ICMP", icmp.NewProtocol6, testReceiveICMP},
+ {"UDP", udp.NewProtocol, testReceiveUDP},
}
snmc := header.SolicitedNodeAddr(addr2)
@@ -186,8 +186,8 @@ func TestReceiveOnSolicitedNodeAddr(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{test.protocolFactory},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
})
e := channel.New(1, 1280, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
@@ -273,7 +273,7 @@ func TestAddIpv6Address(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
t.Fatalf("CreateNIC(_) = %s", err)
@@ -579,8 +579,8 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
e := channel.New(0, 1280, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
@@ -1549,8 +1549,8 @@ func TestReceiveIPv6Fragments(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
e := channel.New(0, 1280, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
@@ -1668,8 +1668,8 @@ func TestInvalidIPv6Fragments(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{
- NewProtocol(),
+ NetworkProtocols: []stack.NetworkProtocolFactory{
+ NewProtocol,
},
})
e := channel.New(0, 1500, linkAddr1)
@@ -1847,7 +1847,7 @@ func TestWriteStats(t *testing.T) {
func buildRoute(t *testing.T, ep stack.LinkEndpoint) stack.Route {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
if err := s.CreateNIC(1, ep); err != nil {
t.Fatalf("CreateNIC(1, _) failed: %s", err)
@@ -1895,3 +1895,30 @@ func (lm *limitedMatcher) Match(stack.Hook, *stack.PacketBuffer, string) (bool,
lm.limit--
return false, false
}
+
+func TestClearEndpointFromProtocolOnClose(t *testing.T) {
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ })
+ proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol)
+ ep := proto.NewEndpoint(&testInterface{}, nil, nil, nil).(*endpoint)
+ {
+ proto.mu.Lock()
+ _, hasEP := proto.mu.eps[ep]
+ proto.mu.Unlock()
+ if !hasEP {
+ t.Fatalf("expected protocol to have ep = %p in set of endpoints", ep)
+ }
+ }
+
+ ep.Close()
+
+ {
+ proto.mu.Lock()
+ _, hasEP := proto.mu.eps[ep]
+ proto.mu.Unlock()
+ if hasEP {
+ t.Fatalf("unexpectedly found ep = %p in set of protocol's endpoints", ep)
+ }
+ }
+}
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/network/ipv6/ndp.go
index 97ca00d16..84c082852 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/network/ipv6/ndp.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package stack
+package ipv6
import (
"fmt"
@@ -23,9 +23,27 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
+ // defaultRetransmitTimer is the default amount of time to wait between
+ // sending reachability probes.
+ //
+ // Default taken from RETRANS_TIMER of RFC 4861 section 10.
+ defaultRetransmitTimer = time.Second
+
+ // minimumRetransmitTimer is the minimum amount of time to wait between
+ // sending reachability probes.
+ //
+ // Note, RFC 4861 does not impose a minimum Retransmit Timer, but we do here
+ // to make sure the messages are not sent all at once. We also come to this
+ // value because in the RetransmitTimer field of a Router Advertisement, a
+ // value of 0 means unspecified, so the smallest valid value is 1. Note, the
+ // unit of the RetransmitTimer field in the Router Advertisement is
+ // milliseconds.
+ minimumRetransmitTimer = time.Millisecond
+
// defaultDupAddrDetectTransmits is the default number of NDP Neighbor
// Solicitation messages to send when doing Duplicate Address Detection
// for a tentative address.
@@ -34,7 +52,7 @@ const (
defaultDupAddrDetectTransmits = 1
// defaultMaxRtrSolicitations is the default number of Router
- // Solicitation messages to send when a NIC becomes enabled.
+ // Solicitation messages to send when an IPv6 endpoint becomes enabled.
//
// Default = 3 (from RFC 4861 section 10).
defaultMaxRtrSolicitations = 3
@@ -131,7 +149,7 @@ const (
minRegenAdvanceDuration = time.Duration(0)
// maxSLAACAddrLocalRegenAttempts is the maximum number of times to attempt
- // SLAAC address regenerations in response to a NIC-local conflict.
+ // SLAAC address regenerations in response to an IPv6 endpoint-local conflict.
maxSLAACAddrLocalRegenAttempts = 10
)
@@ -163,7 +181,7 @@ var (
// This is exported as a variable (instead of a constant) so tests
// can update it to a smaller value.
//
- // This value guarantees that a temporary address will be preferred for at
+ // This value guarantees that a temporary address is preferred for at
// least 1hr if the SLAAC prefix is valid for at least that time.
MinMaxTempAddrPreferredLifetime = defaultRegenAdvanceDuration + MaxDesyncFactor + time.Hour
@@ -173,11 +191,17 @@ var (
// This is exported as a variable (instead of a constant) so tests
// can update it to a smaller value.
//
- // This value guarantees that a temporary address will be valid for at least
+ // This value guarantees that a temporary address is valid for at least
// 2hrs if the SLAAC prefix is valid for at least that time.
MinMaxTempAddrValidLifetime = 2 * time.Hour
)
+// NDPEndpoint is an endpoint that supports NDP.
+type NDPEndpoint interface {
+ // SetNDPConfigurations sets the NDP configurations.
+ SetNDPConfigurations(NDPConfigurations)
+}
+
// DHCPv6ConfigurationFromNDPRA is a configuration available via DHCPv6 that an
// NDP Router Advertisement informed the Stack about.
type DHCPv6ConfigurationFromNDPRA int
@@ -192,7 +216,7 @@ const (
// DHCPv6ManagedAddress indicates that addresses are available via DHCPv6.
//
// DHCPv6ManagedAddress also implies DHCPv6OtherConfigurations because DHCPv6
- // will return all available configuration information.
+ // returns all available configuration information when serving addresses.
DHCPv6ManagedAddress
// DHCPv6OtherConfigurations indicates that other configuration information is
@@ -207,19 +231,18 @@ const (
// NDPDispatcher is the interface integrators of netstack must implement to
// receive and handle NDP related events.
type NDPDispatcher interface {
- // OnDuplicateAddressDetectionStatus will be called when the DAD process
- // for an address (addr) on a NIC (with ID nicID) completes. resolved
- // will be set to true if DAD completed successfully (no duplicate addr
- // detected); false otherwise (addr was detected to be a duplicate on
- // the link the NIC is a part of, or it was stopped for some other
- // reason, such as the address being removed). If an error occured
- // during DAD, err will be set and resolved must be ignored.
+ // OnDuplicateAddressDetectionStatus is called when the DAD process for an
+ // address (addr) on a NIC (with ID nicID) completes. resolved is set to true
+ // if DAD completed successfully (no duplicate addr detected); false otherwise
+ // (addr was detected to be a duplicate on the link the NIC is a part of, or
+ // it was stopped for some other reason, such as the address being removed).
+ // If an error occured during DAD, err is set and resolved must be ignored.
//
// This function is not permitted to block indefinitely. This function
// is also not permitted to call into the stack.
OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error)
- // OnDefaultRouterDiscovered will be called when a new default router is
+ // OnDefaultRouterDiscovered is called when a new default router is
// discovered. Implementations must return true if the newly discovered
// router should be remembered.
//
@@ -227,56 +250,55 @@ type NDPDispatcher interface {
// is also not permitted to call into the stack.
OnDefaultRouterDiscovered(nicID tcpip.NICID, addr tcpip.Address) bool
- // OnDefaultRouterInvalidated will be called when a discovered default
- // router that was remembered is invalidated.
+ // OnDefaultRouterInvalidated is called when a discovered default router that
+ // was remembered is invalidated.
//
// This function is not permitted to block indefinitely. This function
// is also not permitted to call into the stack.
OnDefaultRouterInvalidated(nicID tcpip.NICID, addr tcpip.Address)
- // OnOnLinkPrefixDiscovered will be called when a new on-link prefix is
- // discovered. Implementations must return true if the newly discovered
- // on-link prefix should be remembered.
+ // OnOnLinkPrefixDiscovered is called when a new on-link prefix is discovered.
+ // Implementations must return true if the newly discovered on-link prefix
+ // should be remembered.
//
// This function is not permitted to block indefinitely. This function
// is also not permitted to call into the stack.
OnOnLinkPrefixDiscovered(nicID tcpip.NICID, prefix tcpip.Subnet) bool
- // OnOnLinkPrefixInvalidated will be called when a discovered on-link
- // prefix that was remembered is invalidated.
+ // OnOnLinkPrefixInvalidated is called when a discovered on-link prefix that
+ // was remembered is invalidated.
//
// This function is not permitted to block indefinitely. This function
// is also not permitted to call into the stack.
OnOnLinkPrefixInvalidated(nicID tcpip.NICID, prefix tcpip.Subnet)
- // OnAutoGenAddress will be called when a new prefix with its
- // autonomous address-configuration flag set has been received and SLAAC
- // has been performed. Implementations may prevent the stack from
- // assigning the address to the NIC by returning false.
+ // OnAutoGenAddress is called when a new prefix with its autonomous address-
+ // configuration flag set is received and SLAAC was performed. Implementations
+ // may prevent the stack from assigning the address to the NIC by returning
+ // false.
//
// This function is not permitted to block indefinitely. It must not
// call functions on the stack itself.
OnAutoGenAddress(tcpip.NICID, tcpip.AddressWithPrefix) bool
- // OnAutoGenAddressDeprecated will be called when an auto-generated
- // address (as part of SLAAC) has been deprecated, but is still
- // considered valid. Note, if an address is invalidated at the same
- // time it is deprecated, the deprecation event MAY be omitted.
+ // OnAutoGenAddressDeprecated is called when an auto-generated address (SLAAC)
+ // is deprecated, but is still considered valid. Note, if an address is
+ // invalidated at the same ime it is deprecated, the deprecation event may not
+ // be received.
//
// This function is not permitted to block indefinitely. It must not
// call functions on the stack itself.
OnAutoGenAddressDeprecated(tcpip.NICID, tcpip.AddressWithPrefix)
- // OnAutoGenAddressInvalidated will be called when an auto-generated
- // address (as part of SLAAC) has been invalidated.
+ // OnAutoGenAddressInvalidated is called when an auto-generated address
+ // (SLAAC) is invalidated.
//
// This function is not permitted to block indefinitely. It must not
// call functions on the stack itself.
OnAutoGenAddressInvalidated(tcpip.NICID, tcpip.AddressWithPrefix)
- // OnRecursiveDNSServerOption will be called when an NDP option with
- // recursive DNS servers has been received. Note, addrs may contain
- // link-local addresses.
+ // OnRecursiveDNSServerOption is called when the stack learns of DNS servers
+ // through NDP. Note, the addresses may contain link-local addresses.
//
// It is up to the caller to use the DNS Servers only for their valid
// lifetime. OnRecursiveDNSServerOption may be called for new or
@@ -288,8 +310,8 @@ type NDPDispatcher interface {
// call functions on the stack itself.
OnRecursiveDNSServerOption(nicID tcpip.NICID, addrs []tcpip.Address, lifetime time.Duration)
- // OnDNSSearchListOption will be called when an NDP option with a DNS
- // search list has been received.
+ // OnDNSSearchListOption is called when the stack learns of DNS search lists
+ // through NDP.
//
// It is up to the caller to use the domain names in the search list
// for only their valid lifetime. OnDNSSearchListOption may be called
@@ -298,8 +320,8 @@ type NDPDispatcher interface {
// be increased, decreased or completely invalidated when lifetime = 0.
OnDNSSearchListOption(nicID tcpip.NICID, domainNames []string, lifetime time.Duration)
- // OnDHCPv6Configuration will be called with an updated configuration that is
- // available via DHCPv6 for a specified NIC.
+ // OnDHCPv6Configuration is called with an updated configuration that is
+ // available via DHCPv6 for the passed NIC.
//
// This function is not permitted to block indefinitely. It must not
// call functions on the stack itself.
@@ -320,7 +342,7 @@ type NDPConfigurations struct {
// Must be greater than or equal to 1ms.
RetransmitTimer time.Duration
- // The number of Router Solicitation messages to send when the NIC
+ // The number of Router Solicitation messages to send when the IPv6 endpoint
// becomes enabled.
MaxRtrSolicitations uint8
@@ -335,24 +357,22 @@ type NDPConfigurations struct {
// Must be greater than or equal to 0s.
MaxRtrSolicitationDelay time.Duration
- // HandleRAs determines whether or not Router Advertisements will be
- // processed.
+ // HandleRAs determines whether or not Router Advertisements are processed.
HandleRAs bool
- // DiscoverDefaultRouters determines whether or not default routers will
- // be discovered from Router Advertisements. This configuration is
- // ignored if HandleRAs is false.
+ // DiscoverDefaultRouters determines whether or not default routers are
+ // discovered from Router Advertisements, as per RFC 4861 section 6. This
+ // configuration is ignored if HandleRAs is false.
DiscoverDefaultRouters bool
- // DiscoverOnLinkPrefixes determines whether or not on-link prefixes
- // will be discovered from Router Advertisements' Prefix Information
- // option. This configuration is ignored if HandleRAs is false.
+ // DiscoverOnLinkPrefixes determines whether or not on-link prefixes are
+ // discovered from Router Advertisements' Prefix Information option, as per
+ // RFC 4861 section 6. This configuration is ignored if HandleRAs is false.
DiscoverOnLinkPrefixes bool
- // AutoGenGlobalAddresses determines whether or not global IPv6
- // addresses will be generated for a NIC in response to receiving a new
- // Prefix Information option with its Autonomous Address
- // AutoConfiguration flag set, as a host, as per RFC 4862 (SLAAC).
+ // AutoGenGlobalAddresses determines whether or not an IPv6 endpoint performs
+ // SLAAC to auto-generate global SLAAC addresses in response to Prefix
+ // Information options, as per RFC 4862.
//
// Note, if an address was already generated for some unique prefix, as
// part of SLAAC, this option does not affect whether or not the
@@ -366,12 +386,12 @@ type NDPConfigurations struct {
//
// If the method used to generate the address does not support creating
// alternative addresses (e.g. IIDs based on the modified EUI64 of a NIC's
- // MAC address), then no attempt will be made to resolve the conflict.
+ // MAC address), then no attempt is made to resolve the conflict.
AutoGenAddressConflictRetries uint8
// AutoGenTempGlobalAddresses determines whether or not temporary SLAAC
- // addresses will be generated for a NIC as part of SLAAC privacy extensions,
- // RFC 4941.
+ // addresses are generated for an IPv6 endpoint as part of SLAAC privacy
+ // extensions, as per RFC 4941.
//
// Ignored if AutoGenGlobalAddresses is false.
AutoGenTempGlobalAddresses bool
@@ -410,7 +430,7 @@ func DefaultNDPConfigurations() NDPConfigurations {
}
// validate modifies an NDPConfigurations with valid values. If invalid values
-// are present in c, the corresponding default values will be used instead.
+// are present in c, the corresponding default values are used instead.
func (c *NDPConfigurations) validate() {
if c.RetransmitTimer < minimumRetransmitTimer {
c.RetransmitTimer = defaultRetransmitTimer
@@ -439,8 +459,8 @@ func (c *NDPConfigurations) validate() {
// ndpState is the per-interface NDP state.
type ndpState struct {
- // The NIC this ndpState is for.
- nic *NIC
+ // The IPv6 endpoint this ndpState is for.
+ ep *endpoint
// configs is the per-interface NDP configurations.
configs NDPConfigurations
@@ -458,8 +478,8 @@ type ndpState struct {
// Used to let the Router Solicitation timer know that it has been stopped.
//
// Must only be read from or written to while protected by the lock of
- // the NIC this ndpState is associated with. MUST be set when the timer is
- // set.
+ // the IPv6 endpoint this ndpState is associated with. MUST be set when the
+ // timer is set.
done *bool
}
@@ -492,7 +512,7 @@ type dadState struct {
// Used to let the DAD timer know that it has been stopped.
//
// Must only be read from or written to while protected by the lock of
- // the NIC this dadState is associated with.
+ // the IPv6 endpoint this dadState is associated with.
done *bool
}
@@ -537,7 +557,7 @@ type tempSLAACAddrState struct {
// The address's endpoint.
//
// Must not be nil.
- ref *referencedNetworkEndpoint
+ addressEndpoint stack.AddressEndpoint
// Has a new temporary SLAAC address already been regenerated?
regenerated bool
@@ -567,10 +587,10 @@ type slaacPrefixState struct {
//
// May only be nil when the address is being (re-)generated. Otherwise,
// must not be nil as all SLAAC prefixes must have a stable address.
- ref *referencedNetworkEndpoint
+ addressEndpoint stack.AddressEndpoint
- // The number of times an address has been generated locally where the NIC
- // already had the generated address.
+ // The number of times an address has been generated locally where the IPv6
+ // endpoint already had the generated address.
localGenerationFailures uint8
}
@@ -578,11 +598,12 @@ type slaacPrefixState struct {
tempAddrs map[tcpip.Address]tempSLAACAddrState
// The next two fields are used by both stable and temporary addresses
- // generated for a SLAAC prefix. This is safe as only 1 address will be
- // in the generation and DAD process at any time. That is, no two addresses
- // will be generated at the same time for a given SLAAC prefix.
+ // generated for a SLAAC prefix. This is safe as only 1 address is in the
+ // generation and DAD process at any time. That is, no two addresses are
+ // generated at the same time for a given SLAAC prefix.
- // The number of times an address has been generated and added to the NIC.
+ // The number of times an address has been generated and added to the IPv6
+ // endpoint.
//
// Addresses may be regenerated in reseponse to a DAD conflicts.
generationAttempts uint8
@@ -597,16 +618,16 @@ type slaacPrefixState struct {
// This function must only be called by IPv6 addresses that are currently
// tentative.
//
-// The NIC that ndp belongs to MUST be locked.
-func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *referencedNetworkEndpoint) *tcpip.Error {
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error {
// addr must be a valid unicast IPv6 address.
if !header.IsV6UnicastAddress(addr) {
return tcpip.ErrAddressFamilyNotSupported
}
- if ref.getKind() != permanentTentative {
+ if addressEndpoint.GetKind() != stack.PermanentTentative {
// The endpoint should be marked as tentative since we are starting DAD.
- panic(fmt.Sprintf("ndpdad: addr %s is not tentative on NIC(%d)", addr, ndp.nic.ID()))
+ panic(fmt.Sprintf("ndpdad: addr %s is not tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
}
// Should not attempt to perform DAD on an address that is currently in the
@@ -617,18 +638,18 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
// existed, we would get an error since we attempted to add a duplicate
// address, or its reference count would have been increased without doing
// the work that would have been done for an address that was brand new.
- // See NIC.addAddressLocked.
- panic(fmt.Sprintf("ndpdad: already performing DAD for addr %s on NIC(%d)", addr, ndp.nic.ID()))
+ // See endpoint.addAddressLocked.
+ panic(fmt.Sprintf("ndpdad: already performing DAD for addr %s on NIC(%d)", addr, ndp.ep.nic.ID()))
}
remaining := ndp.configs.DupAddrDetectTransmits
if remaining == 0 {
- ref.setKind(permanent)
+ addressEndpoint.SetKind(stack.Permanent)
// Consider DAD to have resolved even if no DAD messages were actually
// transmitted.
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnDuplicateAddressDetectionStatus(ndp.nic.ID(), addr, true, nil)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, true, nil)
}
return nil
@@ -637,25 +658,25 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
var done bool
var timer tcpip.Timer
// We initially start a timer to fire immediately because some of the DAD work
- // cannot be done while holding the NIC's lock. This is effectively the same
- // as starting a goroutine but we use a timer that fires immediately so we can
- // reset it for the next DAD iteration.
- timer = ndp.nic.stack.Clock().AfterFunc(0, func() {
- ndp.nic.mu.Lock()
- defer ndp.nic.mu.Unlock()
+ // cannot be done while holding the IPv6 endpoint's lock. This is effectively
+ // the same as starting a goroutine but we use a timer that fires immediately
+ // so we can reset it for the next DAD iteration.
+ timer = ndp.ep.protocol.stack.Clock().AfterFunc(0, func() {
+ ndp.ep.mu.Lock()
+ defer ndp.ep.mu.Unlock()
if done {
// If we reach this point, it means that the DAD timer fired after
- // another goroutine already obtained the NIC lock and stopped DAD
- // before this function obtained the NIC lock. Simply return here and do
- // nothing further.
+ // another goroutine already obtained the IPv6 endpoint lock and stopped
+ // DAD before this function obtained the NIC lock. Simply return here and
+ // do nothing further.
return
}
- if ref.getKind() != permanentTentative {
+ if addressEndpoint.GetKind() != stack.PermanentTentative {
// The endpoint should still be marked as tentative since we are still
// performing DAD on it.
- panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.nic.ID()))
+ panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
}
dadDone := remaining == 0
@@ -663,33 +684,34 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
var err *tcpip.Error
if !dadDone {
// Use the unspecified address as the source address when performing DAD.
- ref := ndp.nic.getRefOrCreateTempLocked(header.IPv6ProtocolNumber, header.IPv6Any, NeverPrimaryEndpoint)
+ addressEndpoint := ndp.ep.acquireAddressOrCreateTempLocked(header.IPv6Any, true /* createTemp */, stack.NeverPrimaryEndpoint)
// Do not hold the lock when sending packets which may be a long running
// task or may block link address resolution. We know this is safe
// because immediately after obtaining the lock again, we check if DAD
- // has been stopped before doing any work with the NIC. Note, DAD would be
- // stopped if the NIC was disabled or removed, or if the address was
- // removed.
- ndp.nic.mu.Unlock()
- err = ndp.sendDADPacket(addr, ref)
- ndp.nic.mu.Lock()
+ // has been stopped before doing any work with the IPv6 endpoint. Note,
+ // DAD would be stopped if the IPv6 endpoint was disabled or closed, or if
+ // the address was removed.
+ ndp.ep.mu.Unlock()
+ err = ndp.sendDADPacket(addr, addressEndpoint)
+ ndp.ep.mu.Lock()
+ addressEndpoint.DecRef()
}
if done {
// If we reach this point, it means that DAD was stopped after we released
- // the NIC's read lock and before we obtained the write lock.
+ // the IPv6 endpoint's read lock and before we obtained the write lock.
return
}
if dadDone {
// DAD has resolved.
- ref.setKind(permanent)
+ addressEndpoint.SetKind(stack.Permanent)
} else if err == nil {
// DAD is not done and we had no errors when sending the last NDP NS,
// schedule the next DAD timer.
remaining--
- timer.Reset(ndp.nic.stack.ndpConfigs.RetransmitTimer)
+ timer.Reset(ndp.configs.RetransmitTimer)
return
}
@@ -698,16 +720,16 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
// integrator know DAD has completed.
delete(ndp.dad, addr)
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnDuplicateAddressDetectionStatus(ndp.nic.ID(), addr, dadDone, err)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, dadDone, err)
}
// If DAD resolved for a stable SLAAC address, attempt generation of a
// temporary SLAAC address.
- if dadDone && ref.configType == slaac {
+ if dadDone && addressEndpoint.ConfigType() == stack.AddressConfigSlaac {
// Reset the generation attempts counter as we are starting the generation
// of a new address for the SLAAC prefix.
- ndp.regenerateTempSLAACAddr(ref.addrWithPrefix().Subnet(), true /* resetGenAttempts */)
+ ndp.regenerateTempSLAACAddr(addressEndpoint.AddressWithPrefix().Subnet(), true /* resetGenAttempts */)
}
})
@@ -722,28 +744,31 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
// sendDADPacket sends a NS message to see if any nodes on ndp's NIC's link owns
// addr.
//
-// addr must be a tentative IPv6 address on ndp's NIC.
+// addr must be a tentative IPv6 address on ndp's IPv6 endpoint.
//
-// The NIC ndp belongs to MUST NOT be locked.
-func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEndpoint) *tcpip.Error {
+// The IPv6 endpoint that ndp belongs to MUST NOT be locked.
+func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error {
snmc := header.SolicitedNodeAddr(addr)
- r := makeRoute(header.IPv6ProtocolNumber, ref.address(), snmc, ndp.nic.linkEP.LinkAddress(), ref, false, false)
+ r, err := ndp.ep.protocol.stack.FindRoute(ndp.ep.nic.ID(), header.IPv6Any, snmc, ProtocolNumber, false /* multicastLoop */)
+ if err != nil {
+ return err
+ }
defer r.Release()
// Route should resolve immediately since snmc is a multicast address so a
// remote link address can be calculated without a resolution process.
if c, err := r.Resolve(nil); err != nil {
// Do not consider the NIC being unknown or disabled as a fatal error.
- // Since this method is required to be called when the NIC is not locked,
- // the NIC could have been disabled or removed by another goroutine.
+ // Since this method is required to be called when the IPv6 endpoint is not
+ // locked, the NIC could have been disabled or removed by another goroutine.
if err == tcpip.ErrUnknownNICID || err != tcpip.ErrInvalidEndpointState {
return err
}
- panic(fmt.Sprintf("ndp: error when resolving route to send NDP NS for DAD (%s -> %s on NIC(%d)): %s", header.IPv6Any, snmc, ndp.nic.ID(), err))
+ panic(fmt.Sprintf("ndp: error when resolving route to send NDP NS for DAD (%s -> %s on NIC(%d)): %s", header.IPv6Any, snmc, ndp.ep.nic.ID(), err))
} else if c != nil {
- panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.nic.ID()))
+ panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.ep.nic.ID()))
}
icmpData := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
@@ -752,17 +777,16 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEnd
ns.SetTargetAddress(addr)
icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
- pkt := NewPacketBuffer(PacketBufferOptions{
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
ReserveHeaderBytes: int(r.MaxHeaderLength()),
Data: buffer.View(icmpData).ToVectorisedView(),
})
sent := r.Stats().ICMP.V6PacketsSent
if err := r.WritePacket(nil,
- NetworkHeaderParams{
+ stack.NetworkHeaderParams{
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
- TOS: DefaultTOS,
}, pkt,
); err != nil {
sent.Dropped.Increment()
@@ -778,11 +802,9 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEnd
// such a state forever, unless some other external event resolves the DAD
// process (receiving an NA from the true owner of addr, or an NS for addr
// (implying another node is attempting to use addr)). It is up to the caller
-// of this function to handle such a scenario. Normally, addr will be removed
-// from n right after this function returns or the address successfully
-// resolved.
+// of this function to handle such a scenario.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) stopDuplicateAddressDetection(addr tcpip.Address) {
dad, ok := ndp.dad[addr]
if !ok {
@@ -801,30 +823,30 @@ func (ndp *ndpState) stopDuplicateAddressDetection(addr tcpip.Address) {
delete(ndp.dad, addr)
// Let the integrator know DAD did not resolve.
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnDuplicateAddressDetectionStatus(ndp.nic.ID(), addr, false, nil)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, false, nil)
}
}
// handleRA handles a Router Advertisement message that arrived on the NIC
// this ndp is for. Does nothing if the NIC is configured to not handle RAs.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
- // Is the NIC configured to handle RAs at all?
+ // Is the IPv6 endpoint configured to handle RAs at all?
//
// Currently, the stack does not determine router interface status on a
- // per-interface basis; it is a stack-wide configuration, so we check
- // stack's forwarding flag to determine if the NIC is a routing
- // interface.
- if !ndp.configs.HandleRAs || ndp.nic.stack.Forwarding(header.IPv6ProtocolNumber) {
+ // per-interface basis; it is a protocol-wide configuration, so we check the
+ // protocol's forwarding flag to determine if the IPv6 endpoint is forwarding
+ // packets.
+ if !ndp.configs.HandleRAs || ndp.ep.protocol.Forwarding() {
return
}
// Only worry about the DHCPv6 configuration if we have an NDPDispatcher as we
// only inform the dispatcher on configuration changes. We do nothing else
// with the information.
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
var configuration DHCPv6ConfigurationFromNDPRA
switch {
case ra.ManagedAddrConfFlag():
@@ -839,11 +861,11 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
if ndp.dhcpv6Configuration != configuration {
ndp.dhcpv6Configuration = configuration
- ndpDisp.OnDHCPv6Configuration(ndp.nic.ID(), configuration)
+ ndpDisp.OnDHCPv6Configuration(ndp.ep.nic.ID(), configuration)
}
}
- // Is the NIC configured to discover default routers?
+ // Is the IPv6 endpoint configured to discover default routers?
if ndp.configs.DiscoverDefaultRouters {
rtr, ok := ndp.defaultRouters[ip]
rl := ra.RouterLifetime()
@@ -881,20 +903,20 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
for opt, done, _ := it.Next(); !done; opt, done, _ = it.Next() {
switch opt := opt.(type) {
case header.NDPRecursiveDNSServer:
- if ndp.nic.stack.ndpDisp == nil {
+ if ndp.ep.protocol.ndpDisp == nil {
continue
}
addrs, _ := opt.Addresses()
- ndp.nic.stack.ndpDisp.OnRecursiveDNSServerOption(ndp.nic.ID(), addrs, opt.Lifetime())
+ ndp.ep.protocol.ndpDisp.OnRecursiveDNSServerOption(ndp.ep.nic.ID(), addrs, opt.Lifetime())
case header.NDPDNSSearchList:
- if ndp.nic.stack.ndpDisp == nil {
+ if ndp.ep.protocol.ndpDisp == nil {
continue
}
domainNames, _ := opt.DomainNames()
- ndp.nic.stack.ndpDisp.OnDNSSearchListOption(ndp.nic.ID(), domainNames, opt.Lifetime())
+ ndp.ep.protocol.ndpDisp.OnDNSSearchListOption(ndp.ep.nic.ID(), domainNames, opt.Lifetime())
case header.NDPPrefixInformation:
prefix := opt.Subnet()
@@ -928,7 +950,7 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
// invalidateDefaultRouter invalidates a discovered default router.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
rtr, ok := ndp.defaultRouters[ip]
@@ -942,32 +964,32 @@ func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
delete(ndp.defaultRouters, ip)
// Let the integrator know a discovered default router is invalidated.
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnDefaultRouterInvalidated(ndp.nic.ID(), ip)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnDefaultRouterInvalidated(ndp.ep.nic.ID(), ip)
}
}
// rememberDefaultRouter remembers a newly discovered default router with IPv6
// link-local address ip with lifetime rl.
//
-// The router identified by ip MUST NOT already be known by the NIC.
+// The router identified by ip MUST NOT already be known by the IPv6 endpoint.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
- ndpDisp := ndp.nic.stack.ndpDisp
+ ndpDisp := ndp.ep.protocol.ndpDisp
if ndpDisp == nil {
return
}
// Inform the integrator when we discovered a default router.
- if !ndpDisp.OnDefaultRouterDiscovered(ndp.nic.ID(), ip) {
+ if !ndpDisp.OnDefaultRouterDiscovered(ndp.ep.nic.ID(), ip) {
// Informed by the integrator to not remember the router, do
// nothing further.
return
}
state := defaultRouterState{
- invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
ndp.invalidateDefaultRouter(ip)
}),
}
@@ -982,22 +1004,22 @@ func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
//
// The prefix identified by prefix MUST NOT already be known.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration) {
- ndpDisp := ndp.nic.stack.ndpDisp
+ ndpDisp := ndp.ep.protocol.ndpDisp
if ndpDisp == nil {
return
}
// Inform the integrator when we discovered an on-link prefix.
- if !ndpDisp.OnOnLinkPrefixDiscovered(ndp.nic.ID(), prefix) {
+ if !ndpDisp.OnOnLinkPrefixDiscovered(ndp.ep.nic.ID(), prefix) {
// Informed by the integrator to not remember the prefix, do
// nothing further.
return
}
state := onLinkPrefixState{
- invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
ndp.invalidateOnLinkPrefix(prefix)
}),
}
@@ -1011,7 +1033,7 @@ func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration)
// invalidateOnLinkPrefix invalidates a discovered on-link prefix.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
s, ok := ndp.onLinkPrefixes[prefix]
@@ -1025,8 +1047,8 @@ func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
delete(ndp.onLinkPrefixes, prefix)
// Let the integrator know a discovered on-link prefix is invalidated.
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnOnLinkPrefixInvalidated(ndp.nic.ID(), prefix)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnOnLinkPrefixInvalidated(ndp.ep.nic.ID(), prefix)
}
}
@@ -1036,7 +1058,7 @@ func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
// handleOnLinkPrefixInformation assumes that the prefix this pi is for is
// not the link-local prefix and the on-link flag is set.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) handleOnLinkPrefixInformation(pi header.NDPPrefixInformation) {
prefix := pi.Subnet()
prefixState, ok := ndp.onLinkPrefixes[prefix]
@@ -1089,7 +1111,7 @@ func (ndp *ndpState) handleOnLinkPrefixInformation(pi header.NDPPrefixInformatio
// handleAutonomousPrefixInformation assumes that the prefix this pi is for is
// not the link-local prefix and the autonomous flag is set.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) handleAutonomousPrefixInformation(pi header.NDPPrefixInformation) {
vl := pi.ValidLifetime()
pl := pi.PreferredLifetime()
@@ -1125,7 +1147,7 @@ func (ndp *ndpState) handleAutonomousPrefixInformation(pi header.NDPPrefixInform
//
// pl is the new preferred lifetime. vl is the new valid lifetime.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
// If we do not already have an address for this prefix and the valid
// lifetime is 0, no need to do anything further, as per RFC 4862
@@ -1142,15 +1164,15 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
}
state := slaacPrefixState{
- deprecationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ deprecationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the deprecated SLAAC prefix %s", prefix))
}
- ndp.deprecateSLAACAddress(state.stableAddr.ref)
+ ndp.deprecateSLAACAddress(state.stableAddr.addressEndpoint)
}),
- invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the invalidated SLAAC prefix %s", prefix))
@@ -1189,7 +1211,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
}
// If the address is assigned (DAD resolved), generate a temporary address.
- if state.stableAddr.ref.getKind() == permanent {
+ if state.stableAddr.addressEndpoint.GetKind() == stack.Permanent {
// Reset the generation attempts counter as we are starting the generation
// of a new address for the SLAAC prefix.
ndp.generateTempSLAACAddr(prefix, &state, true /* resetGenAttempts */)
@@ -1198,32 +1220,27 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
ndp.slaacPrefixes[prefix] = state
}
-// addSLAACAddr adds a SLAAC address to the NIC.
+// addAndAcquireSLAACAddr adds a SLAAC address to the IPv6 endpoint.
//
-// The NIC that ndp belongs to MUST be locked.
-func (ndp *ndpState) addSLAACAddr(addr tcpip.AddressWithPrefix, configType networkEndpointConfigType, deprecated bool) *referencedNetworkEndpoint {
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) addAndAcquireSLAACAddr(addr tcpip.AddressWithPrefix, configType stack.AddressConfigType, deprecated bool) stack.AddressEndpoint {
// Inform the integrator that we have a new SLAAC address.
- ndpDisp := ndp.nic.stack.ndpDisp
+ ndpDisp := ndp.ep.protocol.ndpDisp
if ndpDisp == nil {
return nil
}
- if !ndpDisp.OnAutoGenAddress(ndp.nic.ID(), addr) {
+ if !ndpDisp.OnAutoGenAddress(ndp.ep.nic.ID(), addr) {
// Informed by the integrator not to add the address.
return nil
}
- protocolAddr := tcpip.ProtocolAddress{
- Protocol: header.IPv6ProtocolNumber,
- AddressWithPrefix: addr,
- }
-
- ref, err := ndp.nic.addAddressLocked(protocolAddr, FirstPrimaryEndpoint, permanent, configType, deprecated)
+ addressEndpoint, err := ndp.ep.addAndAcquirePermanentAddressLocked(addr, stack.FirstPrimaryEndpoint, configType, deprecated)
if err != nil {
- panic(fmt.Sprintf("ndp: error when adding SLAAC address %+v: %s", protocolAddr, err))
+ panic(fmt.Sprintf("ndp: error when adding SLAAC address %+v: %s", addr, err))
}
- return ref
+ return addressEndpoint
}
// generateSLAACAddr generates a SLAAC address for prefix.
@@ -1232,10 +1249,10 @@ func (ndp *ndpState) addSLAACAddr(addr tcpip.AddressWithPrefix, configType netwo
//
// Panics if the prefix is not a SLAAC prefix or it already has an address.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixState) bool {
- if r := state.stableAddr.ref; r != nil {
- panic(fmt.Sprintf("ndp: SLAAC prefix %s already has a permenant address %s", prefix, r.addrWithPrefix()))
+ if addressEndpoint := state.stableAddr.addressEndpoint; addressEndpoint != nil {
+ panic(fmt.Sprintf("ndp: SLAAC prefix %s already has a permenant address %s", prefix, addressEndpoint.AddressWithPrefix()))
}
// If we have already reached the maximum address generation attempts for the
@@ -1255,11 +1272,11 @@ func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixSt
}
dadCounter := state.generationAttempts + state.stableAddr.localGenerationFailures
- if oIID := ndp.nic.stack.opaqueIIDOpts; oIID.NICNameFromID != nil {
+ if oIID := ndp.ep.protocol.opaqueIIDOpts; oIID.NICNameFromID != nil {
addrBytes = header.AppendOpaqueInterfaceIdentifier(
addrBytes[:header.IIDOffsetInIPv6Address],
prefix,
- oIID.NICNameFromID(ndp.nic.ID(), ndp.nic.name),
+ oIID.NICNameFromID(ndp.ep.nic.ID(), ndp.ep.nic.Name()),
dadCounter,
oIID.SecretKey,
)
@@ -1272,7 +1289,7 @@ func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixSt
//
// TODO(b/141011931): Validate a LinkEndpoint's link address (provided by
// LinkEndpoint.LinkAddress) before reaching this point.
- linkAddr := ndp.nic.linkEP.LinkAddress()
+ linkAddr := ndp.ep.linkEP.LinkAddress()
if !header.IsValidUnicastEthernetAddress(linkAddr) {
return false
}
@@ -1291,15 +1308,15 @@ func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixSt
PrefixLen: validPrefixLenForAutoGen,
}
- if !ndp.nic.hasPermanentAddrLocked(generatedAddr.Address) {
+ if !ndp.ep.hasPermanentAddressRLocked(generatedAddr.Address) {
break
}
state.stableAddr.localGenerationFailures++
}
- if ref := ndp.addSLAACAddr(generatedAddr, slaac, time.Since(state.preferredUntil) >= 0 /* deprecated */); ref != nil {
- state.stableAddr.ref = ref
+ if addressEndpoint := ndp.addAndAcquireSLAACAddr(generatedAddr, stack.AddressConfigSlaac, time.Since(state.preferredUntil) >= 0 /* deprecated */); addressEndpoint != nil {
+ state.stableAddr.addressEndpoint = addressEndpoint
state.generationAttempts++
return true
}
@@ -1309,10 +1326,9 @@ func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixSt
// regenerateSLAACAddr regenerates an address for a SLAAC prefix.
//
-// If generating a new address for the prefix fails, the prefix will be
-// invalidated.
+// If generating a new address for the prefix fails, the prefix is invalidated.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) regenerateSLAACAddr(prefix tcpip.Subnet) {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
@@ -1332,7 +1348,7 @@ func (ndp *ndpState) regenerateSLAACAddr(prefix tcpip.Subnet) {
// generateTempSLAACAddr generates a new temporary SLAAC address.
//
-// If resetGenAttempts is true, the prefix's generation counter will be reset.
+// If resetGenAttempts is true, the prefix's generation counter is reset.
//
// Returns true if a new address was generated.
func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *slaacPrefixState, resetGenAttempts bool) bool {
@@ -1353,7 +1369,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
return false
}
- stableAddr := prefixState.stableAddr.ref.address()
+ stableAddr := prefixState.stableAddr.addressEndpoint.AddressWithPrefix().Address
now := time.Now()
// As per RFC 4941 section 3.3 step 4, the valid lifetime of a temporary
@@ -1392,7 +1408,8 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
return false
}
- // Attempt to generate a new address that is not already assigned to the NIC.
+ // Attempt to generate a new address that is not already assigned to the IPv6
+ // endpoint.
var generatedAddr tcpip.AddressWithPrefix
for i := 0; ; i++ {
// If we were unable to generate an address after the maximum SLAAC address
@@ -1402,7 +1419,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
}
generatedAddr = header.GenerateTempIPv6SLAACAddr(ndp.temporaryIIDHistory[:], stableAddr)
- if !ndp.nic.hasPermanentAddrLocked(generatedAddr.Address) {
+ if !ndp.ep.hasPermanentAddressRLocked(generatedAddr.Address) {
break
}
}
@@ -1410,13 +1427,13 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
// As per RFC RFC 4941 section 3.3 step 5, we MUST NOT create a temporary
// address with a zero preferred lifetime. The checks above ensure this
// so we know the address is not deprecated.
- ref := ndp.addSLAACAddr(generatedAddr, slaacTemp, false /* deprecated */)
- if ref == nil {
+ addressEndpoint := ndp.addAndAcquireSLAACAddr(generatedAddr, stack.AddressConfigSlaacTemp, false /* deprecated */)
+ if addressEndpoint == nil {
return false
}
state := tempSLAACAddrState{
- deprecationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ deprecationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
prefixState, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to deprecate temporary address %s", prefix, generatedAddr))
@@ -1427,9 +1444,9 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
panic(fmt.Sprintf("ndp: must have a tempAddr entry to deprecate temporary address %s", generatedAddr))
}
- ndp.deprecateSLAACAddress(tempAddrState.ref)
+ ndp.deprecateSLAACAddress(tempAddrState.addressEndpoint)
}),
- invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
prefixState, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to invalidate temporary address %s", prefix, generatedAddr))
@@ -1442,7 +1459,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
ndp.invalidateTempSLAACAddr(prefixState.tempAddrs, generatedAddr.Address, tempAddrState)
}),
- regenJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
+ regenJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
prefixState, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to regenerate temporary address after %s", prefix, generatedAddr))
@@ -1465,8 +1482,8 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
prefixState.tempAddrs[generatedAddr.Address] = tempAddrState
ndp.slaacPrefixes[prefix] = prefixState
}),
- createdAt: now,
- ref: ref,
+ createdAt: now,
+ addressEndpoint: addressEndpoint,
}
state.deprecationJob.Schedule(pl)
@@ -1481,7 +1498,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
// regenerateTempSLAACAddr regenerates a temporary address for a SLAAC prefix.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) regenerateTempSLAACAddr(prefix tcpip.Subnet, resetGenAttempts bool) {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
@@ -1496,14 +1513,14 @@ func (ndp *ndpState) regenerateTempSLAACAddr(prefix tcpip.Subnet, resetGenAttemp
//
// pl is the new preferred lifetime. vl is the new valid lifetime.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixState *slaacPrefixState, pl, vl time.Duration) {
// If the preferred lifetime is zero, then the prefix should be deprecated.
deprecated := pl == 0
if deprecated {
- ndp.deprecateSLAACAddress(prefixState.stableAddr.ref)
+ ndp.deprecateSLAACAddress(prefixState.stableAddr.addressEndpoint)
} else {
- prefixState.stableAddr.ref.deprecated = false
+ prefixState.stableAddr.addressEndpoint.SetDeprecated(false)
}
// If prefix was preferred for some finite lifetime before, cancel the
@@ -1565,7 +1582,7 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
// If DAD is not yet complete on the stable address, there is no need to do
// work with temporary addresses.
- if prefixState.stableAddr.ref.getKind() != permanent {
+ if prefixState.stableAddr.addressEndpoint.GetKind() != stack.Permanent {
return
}
@@ -1608,9 +1625,9 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
newPreferredLifetime := preferredUntil.Sub(now)
tempAddrState.deprecationJob.Cancel()
if newPreferredLifetime <= 0 {
- ndp.deprecateSLAACAddress(tempAddrState.ref)
+ ndp.deprecateSLAACAddress(tempAddrState.addressEndpoint)
} else {
- tempAddrState.ref.deprecated = false
+ tempAddrState.addressEndpoint.SetDeprecated(false)
tempAddrState.deprecationJob.Schedule(newPreferredLifetime)
}
@@ -1635,8 +1652,8 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
// due to an update in preferred lifetime.
//
// If each temporay address has already been regenerated, no new temporary
- // address will be generated. To ensure continuation of temporary SLAAC
- // addresses, we manually try to regenerate an address here.
+ // address is generated. To ensure continuation of temporary SLAAC addresses,
+ // we manually try to regenerate an address here.
if len(regenForAddr) != 0 || allAddressesRegenerated {
// Reset the generation attempts counter as we are starting the generation
// of a new address for the SLAAC prefix.
@@ -1647,57 +1664,58 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
}
}
-// deprecateSLAACAddress marks ref as deprecated and notifies the stack's NDP
-// dispatcher that ref has been deprecated.
+// deprecateSLAACAddress marks the address as deprecated and notifies the NDP
+// dispatcher that address has been deprecated.
//
-// deprecateSLAACAddress does nothing if ref is already deprecated.
+// deprecateSLAACAddress does nothing if the address is already deprecated.
//
-// The NIC that ndp belongs to MUST be locked.
-func (ndp *ndpState) deprecateSLAACAddress(ref *referencedNetworkEndpoint) {
- if ref.deprecated {
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) deprecateSLAACAddress(addressEndpoint stack.AddressEndpoint) {
+ if addressEndpoint.Deprecated() {
return
}
- ref.deprecated = true
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnAutoGenAddressDeprecated(ndp.nic.ID(), ref.addrWithPrefix())
+ addressEndpoint.SetDeprecated(true)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnAutoGenAddressDeprecated(ndp.ep.nic.ID(), addressEndpoint.AddressWithPrefix())
}
}
// invalidateSLAACPrefix invalidates a SLAAC prefix.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) invalidateSLAACPrefix(prefix tcpip.Subnet, state slaacPrefixState) {
- if r := state.stableAddr.ref; r != nil {
+ ndp.cleanupSLAACPrefixResources(prefix, state)
+
+ if addressEndpoint := state.stableAddr.addressEndpoint; addressEndpoint != nil {
// Since we are already invalidating the prefix, do not invalidate the
// prefix when removing the address.
- if err := ndp.nic.removePermanentIPv6EndpointLocked(r, false /* allowSLAACInvalidation */); err != nil {
- panic(fmt.Sprintf("ndp: error removing stable SLAAC address %s: %s", r.addrWithPrefix(), err))
+ if err := ndp.ep.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
+ panic(fmt.Sprintf("ndp: error removing stable SLAAC address %s: %s", addressEndpoint.AddressWithPrefix(), err))
}
}
-
- ndp.cleanupSLAACPrefixResources(prefix, state)
}
// cleanupSLAACAddrResourcesAndNotify cleans up an invalidated SLAAC address's
// resources.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix, invalidatePrefix bool) {
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnAutoGenAddressInvalidated(ndp.nic.ID(), addr)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addr)
}
prefix := addr.Subnet()
state, ok := ndp.slaacPrefixes[prefix]
- if !ok || state.stableAddr.ref == nil || addr.Address != state.stableAddr.ref.address() {
+ if !ok || state.stableAddr.addressEndpoint == nil || addr.Address != state.stableAddr.addressEndpoint.AddressWithPrefix().Address {
return
}
if !invalidatePrefix {
// If the prefix is not being invalidated, disassociate the address from the
// prefix and do nothing further.
- state.stableAddr.ref = nil
+ state.stableAddr.addressEndpoint.DecRef()
+ state.stableAddr.addressEndpoint = nil
ndp.slaacPrefixes[prefix] = state
return
}
@@ -1709,14 +1727,17 @@ func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPr
//
// Panics if the SLAAC prefix is not known.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) cleanupSLAACPrefixResources(prefix tcpip.Subnet, state slaacPrefixState) {
// Invalidate all temporary addresses.
for tempAddr, tempAddrState := range state.tempAddrs {
ndp.invalidateTempSLAACAddr(state.tempAddrs, tempAddr, tempAddrState)
}
- state.stableAddr.ref = nil
+ if state.stableAddr.addressEndpoint != nil {
+ state.stableAddr.addressEndpoint.DecRef()
+ state.stableAddr.addressEndpoint = nil
+ }
state.deprecationJob.Cancel()
state.invalidationJob.Cancel()
delete(ndp.slaacPrefixes, prefix)
@@ -1724,12 +1745,12 @@ func (ndp *ndpState) cleanupSLAACPrefixResources(prefix tcpip.Subnet, state slaa
// invalidateTempSLAACAddr invalidates a temporary SLAAC address.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) invalidateTempSLAACAddr(tempAddrs map[tcpip.Address]tempSLAACAddrState, tempAddr tcpip.Address, tempAddrState tempSLAACAddrState) {
// Since we are already invalidating the address, do not invalidate the
// address when removing the address.
- if err := ndp.nic.removePermanentIPv6EndpointLocked(tempAddrState.ref, false /* allowSLAACInvalidation */); err != nil {
- panic(fmt.Sprintf("error removing temporary SLAAC address %s: %s", tempAddrState.ref.addrWithPrefix(), err))
+ if err := ndp.ep.removePermanentEndpointLocked(tempAddrState.addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
+ panic(fmt.Sprintf("error removing temporary SLAAC address %s: %s", tempAddrState.addressEndpoint.AddressWithPrefix(), err))
}
ndp.cleanupTempSLAACAddrResources(tempAddrs, tempAddr, tempAddrState)
@@ -1738,10 +1759,10 @@ func (ndp *ndpState) invalidateTempSLAACAddr(tempAddrs map[tcpip.Address]tempSLA
// cleanupTempSLAACAddrResourcesAndNotify cleans up an invalidated temporary
// SLAAC address's resources from ndp.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix, invalidateAddr bool) {
- if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
- ndpDisp.OnAutoGenAddressInvalidated(ndp.nic.ID(), addr)
+ if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addr)
}
if !invalidateAddr {
@@ -1765,35 +1786,29 @@ func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotify(addr tcpip.AddressWi
// cleanupTempSLAACAddrResourcesAndNotify cleans up a temporary SLAAC address's
// jobs and entry.
//
-// The NIC that ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) cleanupTempSLAACAddrResources(tempAddrs map[tcpip.Address]tempSLAACAddrState, tempAddr tcpip.Address, tempAddrState tempSLAACAddrState) {
+ tempAddrState.addressEndpoint.DecRef()
+ tempAddrState.addressEndpoint = nil
tempAddrState.deprecationJob.Cancel()
tempAddrState.invalidationJob.Cancel()
tempAddrState.regenJob.Cancel()
delete(tempAddrs, tempAddr)
}
-// cleanupState cleans up ndp's state.
-//
-// If hostOnly is true, then only host-specific state will be cleaned up.
+// removeSLAACAddresses removes all SLAAC addresses.
//
-// cleanupState MUST be called with hostOnly set to true when ndp's NIC is
-// transitioning from a host to a router. This function will invalidate all
-// discovered on-link prefixes, discovered routers, and auto-generated
-// addresses.
-//
-// If hostOnly is true, then the link-local auto-generated address will not be
-// invalidated as routers are also expected to generate a link-local address.
+// If keepLinkLocal is false, the SLAAC generated link-local address is removed.
//
-// The NIC that ndp belongs to MUST be locked.
-func (ndp *ndpState) cleanupState(hostOnly bool) {
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) removeSLAACAddresses(keepLinkLocal bool) {
linkLocalSubnet := header.IPv6LinkLocalPrefix.Subnet()
- linkLocalPrefixes := 0
+ var linkLocalPrefixes int
for prefix, state := range ndp.slaacPrefixes {
// RFC 4862 section 5 states that routers are also expected to generate a
// link-local address so we do not invalidate them if we are cleaning up
// host-only state.
- if hostOnly && prefix == linkLocalSubnet {
+ if keepLinkLocal && prefix == linkLocalSubnet {
linkLocalPrefixes++
continue
}
@@ -1804,6 +1819,21 @@ func (ndp *ndpState) cleanupState(hostOnly bool) {
if got := len(ndp.slaacPrefixes); got != linkLocalPrefixes {
panic(fmt.Sprintf("ndp: still have non-linklocal SLAAC prefixes after cleaning up; found = %d prefixes, of which %d are link-local", got, linkLocalPrefixes))
}
+}
+
+// cleanupState cleans up ndp's state.
+//
+// If hostOnly is true, then only host-specific state is cleaned up.
+//
+// This function invalidates all discovered on-link prefixes, discovered
+// routers, and auto-generated addresses.
+//
+// If hostOnly is true, then the link-local auto-generated address aren't
+// invalidated as routers are also expected to generate a link-local address.
+//
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) cleanupState(hostOnly bool) {
+ ndp.removeSLAACAddresses(hostOnly /* keepLinkLocal */)
for prefix := range ndp.onLinkPrefixes {
ndp.invalidateOnLinkPrefix(prefix)
@@ -1827,7 +1857,7 @@ func (ndp *ndpState) cleanupState(hostOnly bool) {
// startSolicitingRouters starts soliciting routers, as per RFC 4861 section
// 6.3.7. If routers are already being solicited, this function does nothing.
//
-// The NIC ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) startSolicitingRouters() {
if ndp.rtrSolicit.timer != nil {
// We are already soliciting routers.
@@ -1848,27 +1878,37 @@ func (ndp *ndpState) startSolicitingRouters() {
var done bool
ndp.rtrSolicit.done = &done
- ndp.rtrSolicit.timer = ndp.nic.stack.Clock().AfterFunc(delay, func() {
- ndp.nic.mu.Lock()
+ ndp.rtrSolicit.timer = ndp.ep.protocol.stack.Clock().AfterFunc(delay, func() {
+ ndp.ep.mu.Lock()
if done {
// If we reach this point, it means that the RS timer fired after another
- // goroutine already obtained the NIC lock and stopped solicitations.
- // Simply return here and do nothing further.
- ndp.nic.mu.Unlock()
+ // goroutine already obtained the IPv6 endpoint lock and stopped
+ // solicitations. Simply return here and do nothing further.
+ ndp.ep.mu.Unlock()
return
}
// As per RFC 4861 section 4.1, the source of the RS is an address assigned
// to the sending interface, or the unspecified address if no address is
// assigned to the sending interface.
- ref := ndp.nic.primaryIPv6EndpointRLocked(header.IPv6AllRoutersMulticastAddress)
- if ref == nil {
- ref = ndp.nic.getRefOrCreateTempLocked(header.IPv6ProtocolNumber, header.IPv6Any, NeverPrimaryEndpoint)
+ addressEndpoint := ndp.ep.acquirePrimaryAddressRLocked(header.IPv6AllRoutersMulticastAddress, false)
+ if addressEndpoint == nil {
+ // Incase this ends up creating a new temporary address, we need to hold
+ // onto the endpoint until a route is obtained. If we decrement the
+ // reference count before obtaing a route, the address's resources would
+ // be released and attempting to obtain a route after would fail. Once a
+ // route is obtainted, it is safe to decrement the reference count since
+ // obtaining a route increments the address's reference count.
+ addressEndpoint = ndp.ep.acquireAddressOrCreateTempLocked(header.IPv6Any, true /* createTemp */, stack.NeverPrimaryEndpoint)
}
- ndp.nic.mu.Unlock()
+ ndp.ep.mu.Unlock()
- localAddr := ref.address()
- r := makeRoute(header.IPv6ProtocolNumber, localAddr, header.IPv6AllRoutersMulticastAddress, ndp.nic.linkEP.LinkAddress(), ref, false, false)
+ localAddr := addressEndpoint.AddressWithPrefix().Address
+ r, err := ndp.ep.protocol.stack.FindRoute(ndp.ep.nic.ID(), localAddr, header.IPv6AllRoutersMulticastAddress, ProtocolNumber, false /* multicastLoop */)
+ addressEndpoint.DecRef()
+ if err != nil {
+ return
+ }
defer r.Release()
// Route should resolve immediately since
@@ -1876,15 +1916,16 @@ func (ndp *ndpState) startSolicitingRouters() {
// remote link address can be calculated without a resolution process.
if c, err := r.Resolve(nil); err != nil {
// Do not consider the NIC being unknown or disabled as a fatal error.
- // Since this method is required to be called when the NIC is not locked,
- // the NIC could have been disabled or removed by another goroutine.
+ // Since this method is required to be called when the IPv6 endpoint is
+ // not locked, the IPv6 endpoint could have been disabled or removed by
+ // another goroutine.
if err == tcpip.ErrUnknownNICID || err == tcpip.ErrInvalidEndpointState {
return
}
- panic(fmt.Sprintf("ndp: error when resolving route to send NDP RS (%s -> %s on NIC(%d)): %s", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID(), err))
+ panic(fmt.Sprintf("ndp: error when resolving route to send NDP RS (%s -> %s on NIC(%d)): %s", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.ep.nic.ID(), err))
} else if c != nil {
- panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP RS (%s -> %s on NIC(%d))", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID()))
+ panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP RS (%s -> %s on NIC(%d))", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.ep.nic.ID()))
}
// As per RFC 4861 section 4.1, an NDP RS SHOULD include the source
@@ -1907,21 +1948,20 @@ func (ndp *ndpState) startSolicitingRouters() {
rs.Options().Serialize(optsSerializer)
icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
- pkt := NewPacketBuffer(PacketBufferOptions{
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
ReserveHeaderBytes: int(r.MaxHeaderLength()),
Data: buffer.View(icmpData).ToVectorisedView(),
})
sent := r.Stats().ICMP.V6PacketsSent
if err := r.WritePacket(nil,
- NetworkHeaderParams{
+ stack.NetworkHeaderParams{
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
- TOS: DefaultTOS,
}, pkt,
); err != nil {
sent.Dropped.Increment()
- log.Printf("startSolicitingRouters: error writing NDP router solicit message on NIC(%d); err = %s", ndp.nic.ID(), err)
+ log.Printf("startSolicitingRouters: error writing NDP router solicit message on NIC(%d); err = %s", ndp.ep.nic.ID(), err)
// Don't send any more messages if we had an error.
remaining = 0
} else {
@@ -1929,19 +1969,19 @@ func (ndp *ndpState) startSolicitingRouters() {
remaining--
}
- ndp.nic.mu.Lock()
+ ndp.ep.mu.Lock()
if done || remaining == 0 {
ndp.rtrSolicit.timer = nil
ndp.rtrSolicit.done = nil
} else if ndp.rtrSolicit.timer != nil {
// Note, we need to explicitly check to make sure that
// the timer field is not nil because if it was nil but
- // we still reached this point, then we know the NIC
+ // we still reached this point, then we know the IPv6 endpoint
// was requested to stop soliciting routers so we don't
// need to send the next Router Solicitation message.
ndp.rtrSolicit.timer.Reset(ndp.configs.RtrSolicitationInterval)
}
- ndp.nic.mu.Unlock()
+ ndp.ep.mu.Unlock()
})
}
@@ -1949,7 +1989,7 @@ func (ndp *ndpState) startSolicitingRouters() {
// stopSolicitingRouters stops soliciting routers. If routers are not currently
// being solicited, this function does nothing.
//
-// The NIC ndp belongs to MUST be locked.
+// The IPv6 endpoint that ndp belongs to MUST be locked.
func (ndp *ndpState) stopSolicitingRouters() {
if ndp.rtrSolicit.timer == nil {
// Nothing to do.
@@ -1965,7 +2005,7 @@ func (ndp *ndpState) stopSolicitingRouters() {
// initializeTempAddrState initializes state related to temporary SLAAC
// addresses.
func (ndp *ndpState) initializeTempAddrState() {
- header.InitialTempIID(ndp.temporaryIIDHistory[:], ndp.nic.stack.tempIIDSeed, ndp.nic.ID())
+ header.InitialTempIID(ndp.temporaryIIDHistory[:], ndp.ep.protocol.tempIIDSeed, ndp.ep.nic.ID())
if MaxDesyncFactor != 0 {
ndp.temporaryAddressDesyncFactor = time.Duration(rand.Int63n(int64(MaxDesyncFactor)))
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index 7434df4a1..25464a03a 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -17,6 +17,7 @@ package ipv6
import (
"strings"
"testing"
+ "time"
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -35,8 +36,8 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeig
t.Helper()
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
UseNeighborCache: useNeighborCache,
})
@@ -65,10 +66,94 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeig
t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
}
- ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
+ ep := netProto.NewEndpoint(&testInterface{}, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{})
+ if err := ep.Enable(); err != nil {
+ t.Fatalf("ep.Enable(): %s", err)
+ }
+ t.Cleanup(ep.Close)
+
return s, ep
}
+var _ NDPDispatcher = (*testNDPDispatcher)(nil)
+
+// testNDPDispatcher is an NDPDispatcher only allows default router discovery.
+type testNDPDispatcher struct {
+ addr tcpip.Address
+}
+
+func (*testNDPDispatcher) OnDuplicateAddressDetectionStatus(tcpip.NICID, tcpip.Address, bool, *tcpip.Error) {
+}
+
+func (t *testNDPDispatcher) OnDefaultRouterDiscovered(_ tcpip.NICID, addr tcpip.Address) bool {
+ t.addr = addr
+ return true
+}
+
+func (t *testNDPDispatcher) OnDefaultRouterInvalidated(_ tcpip.NICID, addr tcpip.Address) {
+ t.addr = addr
+}
+
+func (*testNDPDispatcher) OnOnLinkPrefixDiscovered(tcpip.NICID, tcpip.Subnet) bool {
+ return false
+}
+
+func (*testNDPDispatcher) OnOnLinkPrefixInvalidated(tcpip.NICID, tcpip.Subnet) {
+}
+
+func (*testNDPDispatcher) OnAutoGenAddress(tcpip.NICID, tcpip.AddressWithPrefix) bool {
+ return false
+}
+
+func (*testNDPDispatcher) OnAutoGenAddressDeprecated(tcpip.NICID, tcpip.AddressWithPrefix) {
+}
+
+func (*testNDPDispatcher) OnAutoGenAddressInvalidated(tcpip.NICID, tcpip.AddressWithPrefix) {
+}
+
+func (*testNDPDispatcher) OnRecursiveDNSServerOption(tcpip.NICID, []tcpip.Address, time.Duration) {
+}
+
+func (*testNDPDispatcher) OnDNSSearchListOption(tcpip.NICID, []string, time.Duration) {
+}
+
+func (*testNDPDispatcher) OnDHCPv6Configuration(tcpip.NICID, DHCPv6ConfigurationFromNDPRA) {
+}
+
+func TestStackNDPEndpointInvalidateDefaultRouter(t *testing.T) {
+ var ndpDisp testNDPDispatcher
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocolWithOptions(Options{
+ NDPDisp: &ndpDisp,
+ })},
+ })
+
+ if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
+ t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+ }
+
+ ep, err := s.GetNetworkEndpoint(nicID, ProtocolNumber)
+ if err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, ProtocolNumber, err)
+ }
+
+ ipv6EP := ep.(*endpoint)
+ ipv6EP.mu.Lock()
+ ipv6EP.mu.ndp.rememberDefaultRouter(lladdr1, time.Hour)
+ ipv6EP.mu.Unlock()
+
+ if ndpDisp.addr != lladdr1 {
+ t.Fatalf("got ndpDisp.addr = %s, want = %s", ndpDisp.addr, lladdr1)
+ }
+
+ ndpDisp.addr = ""
+ ndpEP := ep.(stack.NDPEndpoint)
+ ndpEP.InvalidateDefaultRouter(lladdr1)
+ if ndpDisp.addr != lladdr1 {
+ t.Fatalf("got ndpDisp.addr = %s, want = %s", ndpDisp.addr, lladdr1)
+ }
+}
+
// TestNeighorSolicitationWithSourceLinkLayerOption tests that receiving a
// valid NDP NS message with the Source Link Layer Address option results in a
// new entry in the link address cache for the sender of the message.
@@ -98,7 +183,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
e := channel.New(0, 1280, linkAddr0)
if err := s.CreateNIC(nicID, e); err != nil {
@@ -202,7 +287,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
UseNeighborCache: true,
})
e := channel.New(0, 1280, linkAddr0)
@@ -475,7 +560,7 @@ func TestNeighorSolicitationResponse(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
UseNeighborCache: stackTyp.useNeighborCache,
})
e := channel.New(1, 1280, nicLinkAddr)
@@ -596,7 +681,7 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
})
e := channel.New(0, 1280, linkAddr0)
if err := s.CreateNIC(nicID, e); err != nil {
@@ -707,7 +792,7 @@ func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *test
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
UseNeighborCache: true,
})
e := channel.New(0, 1280, linkAddr0)
@@ -1172,7 +1257,7 @@ func TestRouterAdvertValidation(t *testing.T) {
e := channel.New(10, 1280, linkAddr1)
e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
UseNeighborCache: stackTyp.useNeighborCache,
})
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index 91fc26722..51d428049 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -127,8 +127,8 @@ func main() {
// Create the stack with ipv4 and tcp protocols, then add a tun-based
// NIC and ipv4 address.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
mtu, err := rawfile.GetMTU(tunName)
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index 3f58a15ea..8e0ee1cd7 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -112,8 +112,8 @@ func main() {
// Create the stack with ip and tcp protocols, then add a tun-based
// NIC and address.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol, arp.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
mtu, err := rawfile.GetMTU(tunName)
diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD
index 7f1d79115..2eaeab779 100644
--- a/pkg/tcpip/stack/BUILD
+++ b/pkg/tcpip/stack/BUILD
@@ -54,8 +54,8 @@ go_template_instance(
go_library(
name = "stack",
srcs = [
+ "addressable_endpoint_state.go",
"conntrack.go",
- "dhcpv6configurationfromndpra_string.go",
"forwarder.go",
"headertype_string.go",
"icmp_rate_limit.go",
@@ -65,7 +65,6 @@ go_library(
"iptables_types.go",
"linkaddrcache.go",
"linkaddrentry_list.go",
- "ndp.go",
"neighbor_cache.go",
"neighbor_entry.go",
"neighbor_entry_list.go",
@@ -106,6 +105,7 @@ go_test(
name = "stack_x_test",
size = "medium",
srcs = [
+ "addressable_endpoint_state_test.go",
"ndp_test.go",
"nud_test.go",
"stack_test.go",
@@ -116,6 +116,7 @@ go_test(
deps = [
":stack",
"//pkg/rand",
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/checker",
diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go
new file mode 100644
index 000000000..270ac4977
--- /dev/null
+++ b/pkg/tcpip/stack/addressable_endpoint_state.go
@@ -0,0 +1,717 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stack
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/tcpip"
+)
+
+var _ GroupAddressableEndpoint = (*AddressableEndpointState)(nil)
+var _ AddressableEndpoint = (*AddressableEndpointState)(nil)
+
+// AddressableEndpointState is an implementation of an AddressableEndpoint.
+type AddressableEndpointState struct {
+ networkEndpoint NetworkEndpoint
+
+ // Lock ordering (from outer to inner lock ordering):
+ //
+ // AddressableEndpointState.mu
+ // addressState.mu
+ mu struct {
+ sync.RWMutex
+
+ endpoints map[tcpip.Address]*addressState
+ primary []*addressState
+
+ // groups holds the mapping between group addresses and the number of times
+ // they have been joined.
+ groups map[tcpip.Address]uint32
+ }
+}
+
+// Init initializes the AddressableEndpointState with networkEndpoint.
+//
+// Must be called before calling any other function on m.
+func (a *AddressableEndpointState) Init(networkEndpoint NetworkEndpoint) {
+ a.networkEndpoint = networkEndpoint
+
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.mu.endpoints = make(map[tcpip.Address]*addressState)
+ a.mu.groups = make(map[tcpip.Address]uint32)
+}
+
+// ReadOnlyAddressableEndpointState provides read-only access to an
+// AddressableEndpointState.
+type ReadOnlyAddressableEndpointState struct {
+ inner *AddressableEndpointState
+}
+
+// AddrOrMatching returns an endpoint for the passed address that is consisdered
+// bound to the wrapped AddressableEndpointState.
+//
+// If addr is an exact match with an existing address, that address is returned.
+// Otherwise, f is called with each address and the address that f returns true
+// for is returned.
+//
+// Returns nil of no address matches.
+func (m ReadOnlyAddressableEndpointState) AddrOrMatching(addr tcpip.Address, spoofingOrPrimiscuous bool, f func(AddressEndpoint) bool) AddressEndpoint {
+ m.inner.mu.RLock()
+ defer m.inner.mu.RUnlock()
+
+ if ep, ok := m.inner.mu.endpoints[addr]; ok {
+ if ep.IsAssigned(spoofingOrPrimiscuous) && ep.IncRef() {
+ return ep
+ }
+ }
+
+ for _, ep := range m.inner.mu.endpoints {
+ if ep.IsAssigned(spoofingOrPrimiscuous) && f(ep) && ep.IncRef() {
+ return ep
+ }
+ }
+
+ return nil
+}
+
+// Lookup returns the AddressEndpoint for the passed address.
+//
+// Returns nil if the passed address is not associated with the
+// AddressableEndpointState.
+func (m ReadOnlyAddressableEndpointState) Lookup(addr tcpip.Address) AddressEndpoint {
+ m.inner.mu.RLock()
+ defer m.inner.mu.RUnlock()
+
+ ep, ok := m.inner.mu.endpoints[addr]
+ if !ok {
+ return nil
+ }
+ return ep
+}
+
+// ForEach calls f for each address pair.
+//
+// If f returns false, f is no longer be called.
+func (m ReadOnlyAddressableEndpointState) ForEach(f func(AddressEndpoint) bool) {
+ m.inner.mu.RLock()
+ defer m.inner.mu.RUnlock()
+
+ for _, ep := range m.inner.mu.endpoints {
+ if !f(ep) {
+ return
+ }
+ }
+}
+
+// ForEachPrimaryEndpoint calls f for each primary address.
+//
+// If f returns false, f is no longer be called.
+func (m ReadOnlyAddressableEndpointState) ForEachPrimaryEndpoint(f func(AddressEndpoint)) {
+ m.inner.mu.RLock()
+ defer m.inner.mu.RUnlock()
+ for _, ep := range m.inner.mu.primary {
+ f(ep)
+ }
+}
+
+// ReadOnly returns a readonly reference to a.
+func (a *AddressableEndpointState) ReadOnly() ReadOnlyAddressableEndpointState {
+ return ReadOnlyAddressableEndpointState{inner: a}
+}
+
+func (a *AddressableEndpointState) releaseAddressState(addrState *addressState) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.releaseAddressStateLocked(addrState)
+}
+
+// releaseAddressState removes addrState from s's address state (primary and endpoints list).
+//
+// Preconditions: a.mu must be write locked.
+func (a *AddressableEndpointState) releaseAddressStateLocked(addrState *addressState) {
+ oldPrimary := a.mu.primary
+ for i, s := range a.mu.primary {
+ if s == addrState {
+ a.mu.primary = append(a.mu.primary[:i], a.mu.primary[i+1:]...)
+ oldPrimary[len(oldPrimary)-1] = nil
+ break
+ }
+ }
+ delete(a.mu.endpoints, addrState.addr.Address)
+}
+
+// AddAndAcquirePermanentAddress implements AddressableEndpoint.
+func (a *AddressableEndpointState) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, *tcpip.Error) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ ep, err := a.addAndAcquireAddressLocked(addr, peb, configType, deprecated, true /* permanent */)
+ // From https://golang.org/doc/faq#nil_error:
+ //
+ // Under the covers, interfaces are implemented as two elements, a type T and
+ // a value V.
+ //
+ // An interface value is nil only if the V and T are both unset, (T=nil, V is
+ // not set), In particular, a nil interface will always hold a nil type. If we
+ // store a nil pointer of type *int inside an interface value, the inner type
+ // will be *int regardless of the value of the pointer: (T=*int, V=nil). Such
+ // an interface value will therefore be non-nil even when the pointer value V
+ // inside is nil.
+ //
+ // Since addAndAcquireAddressLocked returns a nil value with a non-nil type,
+ // we need to explicitly return nil below if ep is (a typed) nil.
+ if ep == nil {
+ return nil, err
+ }
+ return ep, err
+}
+
+// AddAndAcquireTemporaryAddress adds a temporary address.
+//
+// Returns tcpip.ErrDuplicateAddress if the address exists.
+//
+// The temporary address's endpoint is acquired and returned.
+func (a *AddressableEndpointState) AddAndAcquireTemporaryAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior) (AddressEndpoint, *tcpip.Error) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ ep, err := a.addAndAcquireAddressLocked(addr, peb, AddressConfigStatic, false /* deprecated */, false /* permanent */)
+ // From https://golang.org/doc/faq#nil_error:
+ //
+ // Under the covers, interfaces are implemented as two elements, a type T and
+ // a value V.
+ //
+ // An interface value is nil only if the V and T are both unset, (T=nil, V is
+ // not set), In particular, a nil interface will always hold a nil type. If we
+ // store a nil pointer of type *int inside an interface value, the inner type
+ // will be *int regardless of the value of the pointer: (T=*int, V=nil). Such
+ // an interface value will therefore be non-nil even when the pointer value V
+ // inside is nil.
+ //
+ // Since addAndAcquireAddressLocked returns a nil value with a non-nil type,
+ // we need to explicitly return nil below if ep is (a typed) nil.
+ if ep == nil {
+ return nil, err
+ }
+ return ep, err
+}
+
+// addAndAcquireAddressLocked adds, acquires and returns a permanent or
+// temporary address.
+//
+// If the addressable endpoint already has the address in a non-permanent state,
+// and addAndAcquireAddressLocked is adding a permanent address, that address is
+// promoted in place and its properties set to the properties provided. If the
+// address already exists in any other state, then tcpip.ErrDuplicateAddress is
+// returned, regardless the kind of address that is being added.
+//
+// Precondition: a.mu must be write locked.
+func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated, permanent bool) (*addressState, *tcpip.Error) {
+ // attemptAddToPrimary is false when the address is already in the primary
+ // address list.
+ attemptAddToPrimary := true
+ addrState, ok := a.mu.endpoints[addr.Address]
+ if ok {
+ if !permanent {
+ // We are adding a non-permanent address but the address exists. No need
+ // to go any further since we can only promote existing temporary/expired
+ // addresses to permanent.
+ return nil, tcpip.ErrDuplicateAddress
+ }
+
+ addrState.mu.Lock()
+ if addrState.mu.kind.IsPermanent() {
+ addrState.mu.Unlock()
+ // We are adding a permanent address but a permanent address already
+ // exists.
+ return nil, tcpip.ErrDuplicateAddress
+ }
+
+ if addrState.mu.refs == 0 {
+ panic(fmt.Sprintf("found an address that should have been released (ref count == 0); address = %s", addrState.addr))
+ }
+
+ // We now promote the address.
+ for i, s := range a.mu.primary {
+ if s == addrState {
+ switch peb {
+ case CanBePrimaryEndpoint:
+ // The address is already in the primary address list.
+ attemptAddToPrimary = false
+ case FirstPrimaryEndpoint:
+ if i == 0 {
+ // The address is already first in the primary address list.
+ attemptAddToPrimary = false
+ } else {
+ a.mu.primary = append(a.mu.primary[:i], a.mu.primary[i+1:]...)
+ }
+ case NeverPrimaryEndpoint:
+ a.mu.primary = append(a.mu.primary[:i], a.mu.primary[i+1:]...)
+ default:
+ panic(fmt.Sprintf("unrecognized primary endpoint behaviour = %d", peb))
+ }
+ break
+ }
+ }
+ }
+
+ if addrState == nil {
+ addrState = &addressState{
+ addressableEndpointState: a,
+ addr: addr,
+ }
+ a.mu.endpoints[addr.Address] = addrState
+ addrState.mu.Lock()
+ // We never promote an address to temporary - it can only be added as such.
+ // If we are actaully adding a permanent address, it is promoted below.
+ addrState.mu.kind = Temporary
+ }
+
+ // At this point we have an address we are either promoting from an expired or
+ // temporary address to permanent, promoting an expired address to temporary,
+ // or we are adding a new temporary or permanent address.
+ //
+ // The address MUST be write locked at this point.
+ defer addrState.mu.Unlock()
+
+ if permanent {
+ if addrState.mu.kind.IsPermanent() {
+ panic(fmt.Sprintf("only non-permanent addresses should be promoted to permanent; address = %s", addrState.addr))
+ }
+
+ // Primary addresses are biased by 1.
+ addrState.mu.refs++
+ addrState.mu.kind = Permanent
+ }
+ // Acquire the address before returning it.
+ addrState.mu.refs++
+ addrState.mu.deprecated = deprecated
+ addrState.mu.configType = configType
+
+ if attemptAddToPrimary {
+ switch peb {
+ case NeverPrimaryEndpoint:
+ case CanBePrimaryEndpoint:
+ a.mu.primary = append(a.mu.primary, addrState)
+ case FirstPrimaryEndpoint:
+ if cap(a.mu.primary) == len(a.mu.primary) {
+ a.mu.primary = append([]*addressState{addrState}, a.mu.primary...)
+ } else {
+ // Shift all the endpoints by 1 to make room for the new address at the
+ // front. We could have just created a new slice but this saves
+ // allocations when the slice has capacity for the new address.
+ primaryCount := len(a.mu.primary)
+ a.mu.primary = append(a.mu.primary, nil)
+ if n := copy(a.mu.primary[1:], a.mu.primary); n != primaryCount {
+ panic(fmt.Sprintf("copied %d elements; expected = %d elements", n, primaryCount))
+ }
+ a.mu.primary[0] = addrState
+ }
+ default:
+ panic(fmt.Sprintf("unrecognized primary endpoint behaviour = %d", peb))
+ }
+ }
+
+ return addrState, nil
+}
+
+// RemovePermanentAddress implements AddressableEndpoint.
+func (a *AddressableEndpointState) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ if _, ok := a.mu.groups[addr]; ok {
+ panic(fmt.Sprintf("group address = %s must be removed with LeaveGroup", addr))
+ }
+
+ return a.removePermanentAddressLocked(addr)
+}
+
+// removePermanentAddressLocked is like RemovePermanentAddress but with locking
+// requirements.
+//
+// Precondition: a.mu must be write locked.
+func (a *AddressableEndpointState) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
+ addrState, ok := a.mu.endpoints[addr]
+ if !ok {
+ return tcpip.ErrBadLocalAddress
+ }
+
+ return a.removePermanentEndpointLocked(addrState)
+}
+
+// RemovePermanentEndpoint removes the passed endpoint if it is associated with
+// a and permanent.
+func (a *AddressableEndpointState) RemovePermanentEndpoint(ep AddressEndpoint) *tcpip.Error {
+ addrState, ok := ep.(*addressState)
+ if !ok || addrState.addressableEndpointState != a {
+ return tcpip.ErrInvalidEndpointState
+ }
+
+ return a.removePermanentEndpointLocked(addrState)
+}
+
+// removePermanentAddressLocked is like RemovePermanentAddress but with locking
+// requirements.
+//
+// Precondition: a.mu must be write locked.
+func (a *AddressableEndpointState) removePermanentEndpointLocked(addrState *addressState) *tcpip.Error {
+ if !addrState.GetKind().IsPermanent() {
+ return tcpip.ErrBadLocalAddress
+ }
+
+ addrState.SetKind(PermanentExpired)
+ a.decAddressRefLocked(addrState)
+ return nil
+}
+
+// decAddressRef decrements the address's reference count and releases it once
+// the reference count hits 0.
+func (a *AddressableEndpointState) decAddressRef(addrState *addressState) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.decAddressRefLocked(addrState)
+}
+
+// decAddressRefLocked is like decAddressRef but with locking requirements.
+//
+// Precondition: a.mu must be write locked.
+func (a *AddressableEndpointState) decAddressRefLocked(addrState *addressState) {
+ addrState.mu.Lock()
+ defer addrState.mu.Unlock()
+
+ if addrState.mu.refs == 0 {
+ panic(fmt.Sprintf("attempted to decrease ref count for AddressEndpoint w/ addr = %s when it is already released", addrState.addr))
+ }
+
+ addrState.mu.refs--
+
+ if addrState.mu.refs != 0 {
+ return
+ }
+
+ // A non-expired permanent address must not have its reference count dropped
+ // to 0.
+ if addrState.mu.kind.IsPermanent() {
+ panic(fmt.Sprintf("permanent addresses should be removed through the AddressableEndpoint: addr = %s, kind = %d", addrState.addr, addrState.mu.kind))
+ }
+
+ a.releaseAddressStateLocked(addrState)
+}
+
+// AcquireAssignedAddress implements AddressableEndpoint.
+func (a *AddressableEndpointState) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ if addrState, ok := a.mu.endpoints[localAddr]; ok {
+ if !addrState.IsAssigned(allowTemp) {
+ return nil
+ }
+
+ if !addrState.IncRef() {
+ panic(fmt.Sprintf("failed to increase the reference count for address = %s", addrState.addr))
+ }
+
+ return addrState
+ }
+
+ if !allowTemp {
+ return nil
+ }
+
+ addr := localAddr.WithPrefix()
+ ep, err := a.addAndAcquireAddressLocked(addr, tempPEB, AddressConfigStatic, false /* deprecated */, false /* permanent */)
+ if err != nil {
+ // addAndAcquireAddressLocked only returns an error if the address is
+ // already assigned but we just checked above if the address exists so we
+ // expect no error.
+ panic(fmt.Sprintf("a.addAndAcquireAddressLocked(%s, %d, %d, false, false): %s", addr, tempPEB, AddressConfigStatic, err))
+ }
+ // From https://golang.org/doc/faq#nil_error:
+ //
+ // Under the covers, interfaces are implemented as two elements, a type T and
+ // a value V.
+ //
+ // An interface value is nil only if the V and T are both unset, (T=nil, V is
+ // not set), In particular, a nil interface will always hold a nil type. If we
+ // store a nil pointer of type *int inside an interface value, the inner type
+ // will be *int regardless of the value of the pointer: (T=*int, V=nil). Such
+ // an interface value will therefore be non-nil even when the pointer value V
+ // inside is nil.
+ //
+ // Since addAndAcquireAddressLocked returns a nil value with a non-nil type,
+ // we need to explicitly return nil below if ep is (a typed) nil.
+ if ep == nil {
+ return nil
+ }
+ return ep
+}
+
+// AcquirePrimaryAddress implements AddressableEndpoint.
+func (a *AddressableEndpointState) AcquirePrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+
+ var deprecatedEndpoint *addressState
+ for _, ep := range a.mu.primary {
+ if !ep.IsAssigned(allowExpired) {
+ continue
+ }
+
+ if !ep.Deprecated() {
+ if ep.IncRef() {
+ // ep is not deprecated, so return it immediately.
+ //
+ // If we kept track of a deprecated endpoint, decrement its reference
+ // count since it was incremented when we decided to keep track of it.
+ if deprecatedEndpoint != nil {
+ a.decAddressRefLocked(deprecatedEndpoint)
+ deprecatedEndpoint = nil
+ }
+
+ return ep
+ }
+ } else if deprecatedEndpoint == nil && ep.IncRef() {
+ // We prefer an endpoint that is not deprecated, but we keep track of
+ // ep in case a doesn't have any non-deprecated endpoints.
+ //
+ // If we end up finding a more preferred endpoint, ep's reference count
+ // will be decremented.
+ deprecatedEndpoint = ep
+ }
+ }
+
+ // a doesn't have any valid non-deprecated endpoints, so return
+ // deprecatedEndpoint (which may be nil if a doesn't have any valid deprecated
+ // endpoints either).
+ if deprecatedEndpoint == nil {
+ return nil
+ }
+ return deprecatedEndpoint
+}
+
+// PrimaryAddresses implements AddressableEndpoint.
+func (a *AddressableEndpointState) PrimaryAddresses() []tcpip.AddressWithPrefix {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+
+ var addrs []tcpip.AddressWithPrefix
+ for _, ep := range a.mu.primary {
+ // Don't include tentative, expired or temporary endpoints
+ // to avoid confusion and prevent the caller from using
+ // those.
+ switch ep.GetKind() {
+ case PermanentTentative, PermanentExpired, Temporary:
+ continue
+ }
+
+ addrs = append(addrs, ep.AddressWithPrefix())
+ }
+
+ return addrs
+}
+
+// PermanentAddresses implements AddressableEndpoint.
+func (a *AddressableEndpointState) PermanentAddresses() []tcpip.AddressWithPrefix {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+
+ var addrs []tcpip.AddressWithPrefix
+ for _, ep := range a.mu.endpoints {
+ if !ep.GetKind().IsPermanent() {
+ continue
+ }
+
+ addrs = append(addrs, ep.AddressWithPrefix())
+ }
+
+ return addrs
+}
+
+// JoinGroup implements GroupAddressableEndpoint.
+func (a *AddressableEndpointState) JoinGroup(group tcpip.Address) (bool, *tcpip.Error) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ joins, ok := a.mu.groups[group]
+ if !ok {
+ ep, err := a.addAndAcquireAddressLocked(group.WithPrefix(), NeverPrimaryEndpoint, AddressConfigStatic, false /* deprecated */, true /* permanent */)
+ if err != nil {
+ return false, err
+ }
+ // We have no need for the address endpoint.
+ a.decAddressRefLocked(ep)
+ }
+
+ a.mu.groups[group] = joins + 1
+ return !ok, nil
+}
+
+// LeaveGroup implements GroupAddressableEndpoint.
+func (a *AddressableEndpointState) LeaveGroup(group tcpip.Address) (bool, *tcpip.Error) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ joins, ok := a.mu.groups[group]
+ if !ok {
+ return false, tcpip.ErrBadLocalAddress
+ }
+
+ if joins == 1 {
+ a.removeGroupAddressLocked(group)
+ delete(a.mu.groups, group)
+ return true, nil
+ }
+
+ a.mu.groups[group] = joins - 1
+ return false, nil
+}
+
+// IsInGroup implements GroupAddressableEndpoint.
+func (a *AddressableEndpointState) IsInGroup(group tcpip.Address) bool {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+ _, ok := a.mu.groups[group]
+ return ok
+}
+
+func (a *AddressableEndpointState) removeGroupAddressLocked(group tcpip.Address) {
+ if err := a.removePermanentAddressLocked(group); err != nil {
+ // removePermanentEndpointLocked would only return an error if group is
+ // not bound to the addressable endpoint, but we know it MUST be assigned
+ // since we have group in our map of groups.
+ panic(fmt.Sprintf("error removing group address = %s: %s", group, err))
+ }
+}
+
+// Cleanup forcefully leaves all groups and removes all permanent addresses.
+func (a *AddressableEndpointState) Cleanup() {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+
+ for group := range a.mu.groups {
+ a.removeGroupAddressLocked(group)
+ }
+ a.mu.groups = make(map[tcpip.Address]uint32)
+
+ for _, ep := range a.mu.endpoints {
+ // removePermanentEndpointLocked returns tcpip.ErrBadLocalAddress if ep is
+ // not a permanent address.
+ if err := a.removePermanentEndpointLocked(ep); err != nil && err != tcpip.ErrBadLocalAddress {
+ panic(fmt.Sprintf("unexpected error from removePermanentEndpointLocked(%s): %s", ep.addr, err))
+ }
+ }
+}
+
+var _ AddressEndpoint = (*addressState)(nil)
+
+// addressState holds state for an address.
+type addressState struct {
+ addressableEndpointState *AddressableEndpointState
+ addr tcpip.AddressWithPrefix
+
+ // Lock ordering (from outer to inner lock ordering):
+ //
+ // AddressableEndpointState.mu
+ // addressState.mu
+ mu struct {
+ sync.RWMutex
+
+ refs uint32
+ kind AddressKind
+ configType AddressConfigType
+ deprecated bool
+ }
+}
+
+// NetworkEndpoint implements AddressEndpoint.
+func (a *addressState) NetworkEndpoint() NetworkEndpoint {
+ return a.addressableEndpointState.networkEndpoint
+}
+
+// AddressWithPrefix implements AddressEndpoint.
+func (a *addressState) AddressWithPrefix() tcpip.AddressWithPrefix {
+ return a.addr
+}
+
+// GetKind implements AddressEndpoint.
+func (a *addressState) GetKind() AddressKind {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+ return a.mu.kind
+}
+
+// SetKind implements AddressEndpoint.
+func (a *addressState) SetKind(kind AddressKind) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.mu.kind = kind
+}
+
+// IsAssigned implements AddressEndpoint.
+func (a *addressState) IsAssigned(allowExpired bool) bool {
+ if !a.addressableEndpointState.networkEndpoint.Enabled() {
+ return false
+ }
+
+ switch a.GetKind() {
+ case PermanentTentative:
+ return false
+ case PermanentExpired:
+ return allowExpired
+ default:
+ return true
+ }
+}
+
+// IncRef implements AddressEndpoint.
+func (a *addressState) IncRef() bool {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ if a.mu.refs == 0 {
+ return false
+ }
+
+ a.mu.refs++
+ return true
+}
+
+// DecRef implements AddressEndpoint.
+func (a *addressState) DecRef() {
+ a.addressableEndpointState.decAddressRef(a)
+}
+
+// ConfigType implements AddressEndpoint.
+func (a *addressState) ConfigType() AddressConfigType {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+ return a.mu.configType
+}
+
+// SetDeprecated implements AddressEndpoint.
+func (a *addressState) SetDeprecated(d bool) {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.mu.deprecated = d
+}
+
+// Deprecated implements AddressEndpoint.
+func (a *addressState) Deprecated() bool {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+ return a.mu.deprecated
+}
diff --git a/pkg/tcpip/stack/addressable_endpoint_state_test.go b/pkg/tcpip/stack/addressable_endpoint_state_test.go
new file mode 100644
index 000000000..de4e0d7b1
--- /dev/null
+++ b/pkg/tcpip/stack/addressable_endpoint_state_test.go
@@ -0,0 +1,72 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stack_test
+
+import (
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+// TestAddressableEndpointStateCleanup tests that cleaning up an addressable
+// endpoint state removes permanent addresses and leaves groups.
+func TestAddressableEndpointStateCleanup(t *testing.T) {
+ var s stack.AddressableEndpointState
+ s.Init(&fakeNetworkEndpoint{})
+
+ addr := tcpip.AddressWithPrefix{
+ Address: "\x01",
+ PrefixLen: 8,
+ }
+
+ {
+ ep, err := s.AddAndAcquirePermanentAddress(addr, stack.NeverPrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */)
+ if err != nil {
+ t.Fatalf("s.AddAndAcquirePermanentAddress(%s, %d, %d, false): %s", addr, stack.NeverPrimaryEndpoint, stack.AddressConfigStatic, err)
+ }
+ // We don't need the address endpoint.
+ ep.DecRef()
+ }
+ {
+ ep := s.AcquireAssignedAddress(addr.Address, false /* allowTemp */, stack.NeverPrimaryEndpoint)
+ if ep == nil {
+ t.Fatalf("got s.AcquireAssignedAddress(%s) = nil, want = non-nil", addr.Address)
+ }
+ ep.DecRef()
+ }
+
+ group := tcpip.Address("\x02")
+ if added, err := s.JoinGroup(group); err != nil {
+ t.Fatalf("s.JoinGroup(%s): %s", group, err)
+ } else if !added {
+ t.Fatalf("got s.JoinGroup(%s) = false, want = true", group)
+ }
+ if !s.IsInGroup(group) {
+ t.Fatalf("got s.IsInGroup(%s) = false, want = true", group)
+ }
+
+ s.Cleanup()
+ {
+ ep := s.AcquireAssignedAddress(addr.Address, false /* allowTemp */, stack.NeverPrimaryEndpoint)
+ if ep != nil {
+ ep.DecRef()
+ t.Fatalf("got s.AcquireAssignedAddress(%s) = %s, want = nil", addr.Address, ep.AddressWithPrefix())
+ }
+ }
+ if s.IsInGroup(group) {
+ t.Fatalf("got s.IsInGroup(%s) = true, want = false", group)
+ }
+}
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index e30927821..4e4b00a92 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -20,6 +20,7 @@ import (
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -45,6 +46,8 @@ const (
// use the first three: destination address, source address, and transport
// protocol. They're all one byte fields to simplify parsing.
type fwdTestNetworkEndpoint struct {
+ AddressableEndpointState
+
nicID tcpip.NICID
proto *fwdTestNetworkProtocol
dispatcher TransportDispatcher
@@ -53,12 +56,18 @@ type fwdTestNetworkEndpoint struct {
var _ NetworkEndpoint = (*fwdTestNetworkEndpoint)(nil)
-func (f *fwdTestNetworkEndpoint) MTU() uint32 {
- return f.ep.MTU() - uint32(f.MaxHeaderLength())
+func (*fwdTestNetworkEndpoint) Enable() *tcpip.Error {
+ return nil
}
-func (f *fwdTestNetworkEndpoint) NICID() tcpip.NICID {
- return f.nicID
+func (*fwdTestNetworkEndpoint) Enabled() bool {
+ return true
+}
+
+func (*fwdTestNetworkEndpoint) Disable() {}
+
+func (f *fwdTestNetworkEndpoint) MTU() uint32 {
+ return f.ep.MTU() - uint32(f.MaxHeaderLength())
}
func (*fwdTestNetworkEndpoint) DefaultTTL() uint8 {
@@ -78,10 +87,6 @@ func (f *fwdTestNetworkEndpoint) PseudoHeaderChecksum(protocol tcpip.TransportPr
return 0
}
-func (f *fwdTestNetworkEndpoint) Capabilities() LinkEndpointCapabilities {
- return f.ep.Capabilities()
-}
-
func (f *fwdTestNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
return f.proto.Number()
}
@@ -106,7 +111,9 @@ func (*fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBu
return tcpip.ErrNotSupported
}
-func (*fwdTestNetworkEndpoint) Close() {}
+func (f *fwdTestNetworkEndpoint) Close() {
+ f.AddressableEndpointState.Cleanup()
+}
// fwdTestNetworkProtocol is a network-layer protocol that implements Address
// resolution.
@@ -116,6 +123,11 @@ type fwdTestNetworkProtocol struct {
addrResolveDelay time.Duration
onLinkAddressResolved func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress)
onResolveStaticAddress func(tcpip.Address) (tcpip.LinkAddress, bool)
+
+ mu struct {
+ sync.RWMutex
+ forwarding bool
+ }
}
var _ NetworkProtocol = (*fwdTestNetworkProtocol)(nil)
@@ -145,17 +157,15 @@ func (*fwdTestNetworkProtocol) Parse(pkt *PacketBuffer) (tcpip.TransportProtocol
return tcpip.TransportProtocolNumber(netHeader[protocolNumberOffset]), true, true
}
-func (*fwdTestNetworkProtocol) ReturnError(*Route, tcpip.ICMPReason, *PacketBuffer) *tcpip.Error {
- return nil
-}
-
-func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint {
- return &fwdTestNetworkEndpoint{
- nicID: nicID,
+func (f *fwdTestNetworkProtocol) NewEndpoint(nic NetworkInterface, _ LinkAddressCache, _ NUDHandler, dispatcher TransportDispatcher) NetworkEndpoint {
+ e := &fwdTestNetworkEndpoint{
+ nicID: nic.ID(),
proto: f,
dispatcher: dispatcher,
- ep: ep,
+ ep: nic.LinkEndpoint(),
}
+ e.AddressableEndpointState.Init(e)
+ return e
}
func (*fwdTestNetworkProtocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error {
@@ -190,6 +200,21 @@ func (*fwdTestNetworkProtocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber
return fwdTestNetNumber
}
+// Forwarding implements stack.ForwardingNetworkProtocol.
+func (f *fwdTestNetworkProtocol) Forwarding() bool {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+ return f.mu.forwarding
+
+}
+
+// SetForwarding implements stack.ForwardingNetworkProtocol.
+func (f *fwdTestNetworkProtocol) SetForwarding(v bool) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ f.mu.forwarding = v
+}
+
// fwdTestPacketInfo holds all the information about an outbound packet.
type fwdTestPacketInfo struct {
RemoteLinkAddress tcpip.LinkAddress
@@ -311,7 +336,7 @@ func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protoco
func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol, useNeighborCache bool) (ep1, ep2 *fwdTestLinkEndpoint) {
// Create a stack with the network protocol and two NICs.
s := New(Options{
- NetworkProtocols: []NetworkProtocol{proto},
+ NetworkProtocols: []NetworkProtocolFactory{func(*Stack) NetworkProtocol { return proto }},
UseNeighborCache: useNeighborCache,
})
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 5e43a9b0b..73a01c2dd 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -150,10 +150,10 @@ type ndpDNSSLEvent struct {
type ndpDHCPv6Event struct {
nicID tcpip.NICID
- configuration stack.DHCPv6ConfigurationFromNDPRA
+ configuration ipv6.DHCPv6ConfigurationFromNDPRA
}
-var _ stack.NDPDispatcher = (*ndpDispatcher)(nil)
+var _ ipv6.NDPDispatcher = (*ndpDispatcher)(nil)
// ndpDispatcher implements NDPDispatcher so tests can know when various NDP
// related events happen for test purposes.
@@ -170,7 +170,7 @@ type ndpDispatcher struct {
dhcpv6ConfigurationC chan ndpDHCPv6Event
}
-// Implements stack.NDPDispatcher.OnDuplicateAddressDetectionStatus.
+// Implements ipv6.NDPDispatcher.OnDuplicateAddressDetectionStatus.
func (n *ndpDispatcher) OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error) {
if n.dadC != nil {
n.dadC <- ndpDADEvent{
@@ -182,7 +182,7 @@ func (n *ndpDispatcher) OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, add
}
}
-// Implements stack.NDPDispatcher.OnDefaultRouterDiscovered.
+// Implements ipv6.NDPDispatcher.OnDefaultRouterDiscovered.
func (n *ndpDispatcher) OnDefaultRouterDiscovered(nicID tcpip.NICID, addr tcpip.Address) bool {
if c := n.routerC; c != nil {
c <- ndpRouterEvent{
@@ -195,7 +195,7 @@ func (n *ndpDispatcher) OnDefaultRouterDiscovered(nicID tcpip.NICID, addr tcpip.
return n.rememberRouter
}
-// Implements stack.NDPDispatcher.OnDefaultRouterInvalidated.
+// Implements ipv6.NDPDispatcher.OnDefaultRouterInvalidated.
func (n *ndpDispatcher) OnDefaultRouterInvalidated(nicID tcpip.NICID, addr tcpip.Address) {
if c := n.routerC; c != nil {
c <- ndpRouterEvent{
@@ -206,7 +206,7 @@ func (n *ndpDispatcher) OnDefaultRouterInvalidated(nicID tcpip.NICID, addr tcpip
}
}
-// Implements stack.NDPDispatcher.OnOnLinkPrefixDiscovered.
+// Implements ipv6.NDPDispatcher.OnOnLinkPrefixDiscovered.
func (n *ndpDispatcher) OnOnLinkPrefixDiscovered(nicID tcpip.NICID, prefix tcpip.Subnet) bool {
if c := n.prefixC; c != nil {
c <- ndpPrefixEvent{
@@ -219,7 +219,7 @@ func (n *ndpDispatcher) OnOnLinkPrefixDiscovered(nicID tcpip.NICID, prefix tcpip
return n.rememberPrefix
}
-// Implements stack.NDPDispatcher.OnOnLinkPrefixInvalidated.
+// Implements ipv6.NDPDispatcher.OnOnLinkPrefixInvalidated.
func (n *ndpDispatcher) OnOnLinkPrefixInvalidated(nicID tcpip.NICID, prefix tcpip.Subnet) {
if c := n.prefixC; c != nil {
c <- ndpPrefixEvent{
@@ -261,7 +261,7 @@ func (n *ndpDispatcher) OnAutoGenAddressInvalidated(nicID tcpip.NICID, addr tcpi
}
}
-// Implements stack.NDPDispatcher.OnRecursiveDNSServerOption.
+// Implements ipv6.NDPDispatcher.OnRecursiveDNSServerOption.
func (n *ndpDispatcher) OnRecursiveDNSServerOption(nicID tcpip.NICID, addrs []tcpip.Address, lifetime time.Duration) {
if c := n.rdnssC; c != nil {
c <- ndpRDNSSEvent{
@@ -274,7 +274,7 @@ func (n *ndpDispatcher) OnRecursiveDNSServerOption(nicID tcpip.NICID, addrs []tc
}
}
-// Implements stack.NDPDispatcher.OnDNSSearchListOption.
+// Implements ipv6.NDPDispatcher.OnDNSSearchListOption.
func (n *ndpDispatcher) OnDNSSearchListOption(nicID tcpip.NICID, domainNames []string, lifetime time.Duration) {
if n.dnsslC != nil {
n.dnsslC <- ndpDNSSLEvent{
@@ -285,8 +285,8 @@ func (n *ndpDispatcher) OnDNSSearchListOption(nicID tcpip.NICID, domainNames []s
}
}
-// Implements stack.NDPDispatcher.OnDHCPv6Configuration.
-func (n *ndpDispatcher) OnDHCPv6Configuration(nicID tcpip.NICID, configuration stack.DHCPv6ConfigurationFromNDPRA) {
+// Implements ipv6.NDPDispatcher.OnDHCPv6Configuration.
+func (n *ndpDispatcher) OnDHCPv6Configuration(nicID tcpip.NICID, configuration ipv6.DHCPv6ConfigurationFromNDPRA) {
if c := n.dhcpv6ConfigurationC; c != nil {
c <- ndpDHCPv6Event{
nicID,
@@ -319,13 +319,12 @@ func TestDADDisabled(t *testing.T) {
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent, 1),
}
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPDisp: &ndpDisp,
- }
-
e := channel.New(0, 1280, linkAddr1)
- s := stack.New(opts)
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPDisp: &ndpDisp,
+ })},
+ })
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -413,19 +412,21 @@ func TestDADResolve(t *testing.T) {
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent),
}
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPDisp: &ndpDisp,
- }
- opts.NDPConfigs.RetransmitTimer = test.retransTimer
- opts.NDPConfigs.DupAddrDetectTransmits = test.dupAddrDetectTransmits
e := channelLinkWithHeaderLength{
Endpoint: channel.New(int(test.dupAddrDetectTransmits), 1280, linkAddr1),
headerLength: test.linkHeaderLen,
}
e.Endpoint.LinkEPCapabilities |= stack.CapabilityResolutionRequired
- s := stack.New(opts)
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPDisp: &ndpDisp,
+ NDPConfigs: ipv6.NDPConfigurations{
+ RetransmitTimer: test.retransTimer,
+ DupAddrDetectTransmits: test.dupAddrDetectTransmits,
+ },
+ })},
+ })
if err := s.CreateNIC(nicID, &e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -558,6 +559,26 @@ func TestDADResolve(t *testing.T) {
}
}
+func rxNDPSolicit(e *channel.Endpoint, tgt tcpip.Address) {
+ hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6NeighborSolicitMinimumSize)
+ pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize))
+ pkt.SetType(header.ICMPv6NeighborSolicit)
+ ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+ ns.SetTargetAddress(tgt)
+ snmc := header.SolicitedNodeAddr(tgt)
+ pkt.SetChecksum(header.ICMPv6Checksum(pkt, header.IPv6Any, snmc, buffer.VectorisedView{}))
+ payloadLength := hdr.UsedLength()
+ ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+ ip.Encode(&header.IPv6Fields{
+ PayloadLength: uint16(payloadLength),
+ NextHeader: uint8(icmp.ProtocolNumber6),
+ HopLimit: 255,
+ SrcAddr: header.IPv6Any,
+ DstAddr: snmc,
+ })
+ e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{Data: hdr.View().ToVectorisedView()}))
+}
+
// TestDADFail tests to make sure that the DAD process fails if another node is
// detected to be performing DAD on the same address (receive an NS message from
// a node doing DAD for the same address), or if another node is detected to own
@@ -567,39 +588,19 @@ func TestDADFail(t *testing.T) {
tests := []struct {
name string
- makeBuf func(tgt tcpip.Address) buffer.Prependable
+ rxPkt func(e *channel.Endpoint, tgt tcpip.Address)
getStat func(s tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter
}{
{
- "RxSolicit",
- func(tgt tcpip.Address) buffer.Prependable {
- hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6NeighborSolicitMinimumSize)
- pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize))
- pkt.SetType(header.ICMPv6NeighborSolicit)
- ns := header.NDPNeighborSolicit(pkt.NDPPayload())
- ns.SetTargetAddress(tgt)
- snmc := header.SolicitedNodeAddr(tgt)
- pkt.SetChecksum(header.ICMPv6Checksum(pkt, header.IPv6Any, snmc, buffer.VectorisedView{}))
- payloadLength := hdr.UsedLength()
- ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
- ip.Encode(&header.IPv6Fields{
- PayloadLength: uint16(payloadLength),
- NextHeader: uint8(icmp.ProtocolNumber6),
- HopLimit: 255,
- SrcAddr: header.IPv6Any,
- DstAddr: snmc,
- })
-
- return hdr
-
- },
- func(s tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+ name: "RxSolicit",
+ rxPkt: rxNDPSolicit,
+ getStat: func(s tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
return s.NeighborSolicit
},
},
{
- "RxAdvert",
- func(tgt tcpip.Address) buffer.Prependable {
+ name: "RxAdvert",
+ rxPkt: func(e *channel.Endpoint, tgt tcpip.Address) {
naSize := header.ICMPv6NeighborAdvertMinimumSize + header.NDPLinkLayerAddressSize
hdr := buffer.NewPrependable(header.IPv6MinimumSize + naSize)
pkt := header.ICMPv6(hdr.Prepend(naSize))
@@ -621,11 +622,9 @@ func TestDADFail(t *testing.T) {
SrcAddr: tgt,
DstAddr: header.IPv6AllNodesMulticastAddress,
})
-
- return hdr
-
+ e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{Data: hdr.View().ToVectorisedView()}))
},
- func(s tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+ getStat: func(s tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
return s.NeighborAdvert
},
},
@@ -636,16 +635,16 @@ func TestDADFail(t *testing.T) {
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent, 1),
}
- ndpConfigs := stack.DefaultNDPConfigurations()
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: ndpConfigs,
- NDPDisp: &ndpDisp,
- }
- opts.NDPConfigs.RetransmitTimer = time.Second * 2
+ ndpConfigs := ipv6.DefaultNDPConfigurations()
+ ndpConfigs.RetransmitTimer = time.Second * 2
e := channel.New(0, 1280, linkAddr1)
- s := stack.New(opts)
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPDisp: &ndpDisp,
+ NDPConfigs: ndpConfigs,
+ })},
+ })
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -664,13 +663,8 @@ func TestDADFail(t *testing.T) {
t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
- // Receive a packet to simulate multiple nodes owning or
- // attempting to own the same address.
- hdr := test.makeBuf(addr1)
- pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: hdr.View().ToVectorisedView(),
- })
- e.InjectInbound(header.IPv6ProtocolNumber, pkt)
+ // Receive a packet to simulate an address conflict.
+ test.rxPkt(e, addr1)
stat := test.getStat(s.Stats().ICMP.V6PacketsReceived)
if got := stat.Value(); got != 1 {
@@ -754,18 +748,19 @@ func TestDADStop(t *testing.T) {
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent, 1),
}
- ndpConfigs := stack.NDPConfigurations{
+
+ ndpConfigs := ipv6.NDPConfigurations{
RetransmitTimer: time.Second,
DupAddrDetectTransmits: 2,
}
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPDisp: &ndpDisp,
- NDPConfigs: ndpConfigs,
- }
e := channel.New(0, 1280, linkAddr1)
- s := stack.New(opts)
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPDisp: &ndpDisp,
+ NDPConfigs: ndpConfigs,
+ })},
+ })
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
}
@@ -815,19 +810,6 @@ func TestDADStop(t *testing.T) {
}
}
-// TestSetNDPConfigurationFailsForBadNICID tests to make sure we get an error if
-// we attempt to update NDP configurations using an invalid NICID.
-func TestSetNDPConfigurationFailsForBadNICID(t *testing.T) {
- s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- })
-
- // No NIC with ID 1 yet.
- if got := s.SetNDPConfigurations(1, stack.NDPConfigurations{}); got != tcpip.ErrUnknownNICID {
- t.Fatalf("got s.SetNDPConfigurations = %v, want = %s", got, tcpip.ErrUnknownNICID)
- }
-}
-
// TestSetNDPConfigurations tests that we can update and use per-interface NDP
// configurations without affecting the default NDP configurations or other
// interfaces' configurations.
@@ -863,8 +845,9 @@ func TestSetNDPConfigurations(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPDisp: &ndpDisp,
+ })},
})
expectDADEvent := func(nicID tcpip.NICID, addr tcpip.Address) {
@@ -892,12 +875,15 @@ func TestSetNDPConfigurations(t *testing.T) {
}
// Update the NDP configurations on NIC(1) to use DAD.
- configs := stack.NDPConfigurations{
+ configs := ipv6.NDPConfigurations{
DupAddrDetectTransmits: test.dupAddrDetectTransmits,
RetransmitTimer: test.retransmitTimer,
}
- if err := s.SetNDPConfigurations(nicID1, configs); err != nil {
- t.Fatalf("got SetNDPConfigurations(%d, _) = %s", nicID1, err)
+ if ipv6Ep, err := s.GetNetworkEndpoint(nicID1, header.IPv6ProtocolNumber); err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID1, header.IPv6ProtocolNumber, err)
+ } else {
+ ndpEP := ipv6Ep.(ipv6.NDPEndpoint)
+ ndpEP.SetNDPConfigurations(configs)
}
// Created after updating NIC(1)'s NDP configurations
@@ -1113,12 +1099,13 @@ func TestNoRouterDiscovery(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: handle,
- DiscoverDefaultRouters: discover,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: handle,
+ DiscoverDefaultRouters: discover,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
s.SetForwarding(ipv6.ProtocolNumber, forwarding)
@@ -1151,12 +1138,13 @@ func TestRouterDiscoveryDispatcherNoRemember(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1192,12 +1180,13 @@ func TestRouterDiscovery(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
expectRouterEvent := func(addr tcpip.Address, discovered bool) {
@@ -1285,7 +1274,7 @@ func TestRouterDiscovery(t *testing.T) {
}
// TestRouterDiscoveryMaxRouters tests that only
-// stack.MaxDiscoveredDefaultRouters discovered routers are remembered.
+// ipv6.MaxDiscoveredDefaultRouters discovered routers are remembered.
func TestRouterDiscoveryMaxRouters(t *testing.T) {
ndpDisp := ndpDispatcher{
routerC: make(chan ndpRouterEvent, 1),
@@ -1293,12 +1282,13 @@ func TestRouterDiscoveryMaxRouters(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1306,14 +1296,14 @@ func TestRouterDiscoveryMaxRouters(t *testing.T) {
}
// Receive an RA from 2 more than the max number of discovered routers.
- for i := 1; i <= stack.MaxDiscoveredDefaultRouters+2; i++ {
+ for i := 1; i <= ipv6.MaxDiscoveredDefaultRouters+2; i++ {
linkAddr := []byte{2, 2, 3, 4, 5, 0}
linkAddr[5] = byte(i)
llAddr := header.LinkLocalAddr(tcpip.LinkAddress(linkAddr))
e.InjectInbound(header.IPv6ProtocolNumber, raBuf(llAddr, 5))
- if i <= stack.MaxDiscoveredDefaultRouters {
+ if i <= ipv6.MaxDiscoveredDefaultRouters {
select {
case e := <-ndpDisp.routerC:
if diff := checkRouterEvent(e, llAddr, true); diff != "" {
@@ -1358,12 +1348,13 @@ func TestNoPrefixDiscovery(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: handle,
- DiscoverOnLinkPrefixes: discover,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: handle,
+ DiscoverOnLinkPrefixes: discover,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
s.SetForwarding(ipv6.ProtocolNumber, forwarding)
@@ -1399,13 +1390,14 @@ func TestPrefixDiscoveryDispatcherNoRemember(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: false,
- DiscoverOnLinkPrefixes: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: false,
+ DiscoverOnLinkPrefixes: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1445,12 +1437,13 @@ func TestPrefixDiscovery(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverOnLinkPrefixes: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverOnLinkPrefixes: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1545,12 +1538,13 @@ func TestPrefixDiscoveryWithInfiniteLifetime(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverOnLinkPrefixes: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverOnLinkPrefixes: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1621,33 +1615,34 @@ func TestPrefixDiscoveryWithInfiniteLifetime(t *testing.T) {
}
// TestPrefixDiscoveryMaxRouters tests that only
-// stack.MaxDiscoveredOnLinkPrefixes discovered on-link prefixes are remembered.
+// ipv6.MaxDiscoveredOnLinkPrefixes discovered on-link prefixes are remembered.
func TestPrefixDiscoveryMaxOnLinkPrefixes(t *testing.T) {
ndpDisp := ndpDispatcher{
- prefixC: make(chan ndpPrefixEvent, stack.MaxDiscoveredOnLinkPrefixes+3),
+ prefixC: make(chan ndpPrefixEvent, ipv6.MaxDiscoveredOnLinkPrefixes+3),
rememberPrefix: true,
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: false,
- DiscoverOnLinkPrefixes: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: false,
+ DiscoverOnLinkPrefixes: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
t.Fatalf("CreateNIC(1) = %s", err)
}
- optSer := make(header.NDPOptionsSerializer, stack.MaxDiscoveredOnLinkPrefixes+2)
- prefixes := [stack.MaxDiscoveredOnLinkPrefixes + 2]tcpip.Subnet{}
+ optSer := make(header.NDPOptionsSerializer, ipv6.MaxDiscoveredOnLinkPrefixes+2)
+ prefixes := [ipv6.MaxDiscoveredOnLinkPrefixes + 2]tcpip.Subnet{}
// Receive an RA with 2 more than the max number of discovered on-link
// prefixes.
- for i := 0; i < stack.MaxDiscoveredOnLinkPrefixes+2; i++ {
+ for i := 0; i < ipv6.MaxDiscoveredOnLinkPrefixes+2; i++ {
prefixAddr := [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}
prefixAddr[7] = byte(i)
prefix := tcpip.AddressWithPrefix{
@@ -1665,8 +1660,8 @@ func TestPrefixDiscoveryMaxOnLinkPrefixes(t *testing.T) {
}
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithOpts(llAddr1, 0, optSer))
- for i := 0; i < stack.MaxDiscoveredOnLinkPrefixes+2; i++ {
- if i < stack.MaxDiscoveredOnLinkPrefixes {
+ for i := 0; i < ipv6.MaxDiscoveredOnLinkPrefixes+2; i++ {
+ if i < ipv6.MaxDiscoveredOnLinkPrefixes {
select {
case e := <-ndpDisp.prefixC:
if diff := checkPrefixEvent(e, prefixes[i], true); diff != "" {
@@ -1716,12 +1711,13 @@ func TestNoAutoGenAddr(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: handle,
- AutoGenGlobalAddresses: autogen,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: handle,
+ AutoGenGlobalAddresses: autogen,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
s.SetForwarding(ipv6.ProtocolNumber, forwarding)
@@ -1749,14 +1745,14 @@ func checkAutoGenAddrEvent(e ndpAutoGenAddrEvent, addr tcpip.AddressWithPrefix,
// TestAutoGenAddr tests that an address is properly generated and invalidated
// when configured to do so.
-func TestAutoGenAddr(t *testing.T) {
+func TestAutoGenAddr2(t *testing.T) {
const newMinVL = 2
newMinVLDuration := newMinVL * time.Second
- saved := stack.MinPrefixInformationValidLifetimeForUpdate
+ saved := ipv6.MinPrefixInformationValidLifetimeForUpdate
defer func() {
- stack.MinPrefixInformationValidLifetimeForUpdate = saved
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = saved
}()
- stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
@@ -1766,12 +1762,13 @@ func TestAutoGenAddr(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -1876,14 +1873,14 @@ func TestAutoGenTempAddr(t *testing.T) {
newMinVLDuration = newMinVL * time.Second
)
- savedMinPrefixInformationValidLifetimeForUpdate := stack.MinPrefixInformationValidLifetimeForUpdate
- savedMaxDesync := stack.MaxDesyncFactor
+ savedMinPrefixInformationValidLifetimeForUpdate := ipv6.MinPrefixInformationValidLifetimeForUpdate
+ savedMaxDesync := ipv6.MaxDesyncFactor
defer func() {
- stack.MinPrefixInformationValidLifetimeForUpdate = savedMinPrefixInformationValidLifetimeForUpdate
- stack.MaxDesyncFactor = savedMaxDesync
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = savedMinPrefixInformationValidLifetimeForUpdate
+ ipv6.MaxDesyncFactor = savedMaxDesync
}()
- stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
- stack.MaxDesyncFactor = time.Nanosecond
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
+ ipv6.MaxDesyncFactor = time.Nanosecond
prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
@@ -1931,16 +1928,17 @@ func TestAutoGenTempAddr(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- DupAddrDetectTransmits: test.dupAddrTransmits,
- RetransmitTimer: test.retransmitTimer,
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- AutoGenTempGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
- TempIIDSeed: seed,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ DupAddrDetectTransmits: test.dupAddrTransmits,
+ RetransmitTimer: test.retransmitTimer,
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ AutoGenTempGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ TempIIDSeed: seed,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
@@ -2119,11 +2117,11 @@ func TestAutoGenTempAddr(t *testing.T) {
func TestNoAutoGenTempAddrForLinkLocal(t *testing.T) {
const nicID = 1
- savedMaxDesyncFactor := stack.MaxDesyncFactor
+ savedMaxDesyncFactor := ipv6.MaxDesyncFactor
defer func() {
- stack.MaxDesyncFactor = savedMaxDesyncFactor
+ ipv6.MaxDesyncFactor = savedMaxDesyncFactor
}()
- stack.MaxDesyncFactor = time.Nanosecond
+ ipv6.MaxDesyncFactor = time.Nanosecond
tests := []struct {
name string
@@ -2160,12 +2158,13 @@ func TestNoAutoGenTempAddrForLinkLocal(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- AutoGenTempGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
- AutoGenIPv6LinkLocal: true,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ AutoGenTempGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ AutoGenIPv6LinkLocal: true,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
@@ -2211,11 +2210,11 @@ func TestNoAutoGenTempAddrWithoutStableAddr(t *testing.T) {
retransmitTimer = 2 * time.Second
)
- savedMaxDesyncFactor := stack.MaxDesyncFactor
+ savedMaxDesyncFactor := ipv6.MaxDesyncFactor
defer func() {
- stack.MaxDesyncFactor = savedMaxDesyncFactor
+ ipv6.MaxDesyncFactor = savedMaxDesyncFactor
}()
- stack.MaxDesyncFactor = 0
+ ipv6.MaxDesyncFactor = 0
prefix, _, addr := prefixSubnetAddr(0, linkAddr1)
var tempIIDHistory [header.IIDSize]byte
@@ -2228,15 +2227,16 @@ func TestNoAutoGenTempAddrWithoutStableAddr(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- DupAddrDetectTransmits: dadTransmits,
- RetransmitTimer: retransmitTimer,
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- AutoGenTempGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ DupAddrDetectTransmits: dadTransmits,
+ RetransmitTimer: retransmitTimer,
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ AutoGenTempGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
@@ -2294,17 +2294,17 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
newMinVLDuration = newMinVL * time.Second
)
- savedMaxDesyncFactor := stack.MaxDesyncFactor
- savedMinMaxTempAddrPreferredLifetime := stack.MinMaxTempAddrPreferredLifetime
- savedMinMaxTempAddrValidLifetime := stack.MinMaxTempAddrValidLifetime
+ savedMaxDesyncFactor := ipv6.MaxDesyncFactor
+ savedMinMaxTempAddrPreferredLifetime := ipv6.MinMaxTempAddrPreferredLifetime
+ savedMinMaxTempAddrValidLifetime := ipv6.MinMaxTempAddrValidLifetime
defer func() {
- stack.MaxDesyncFactor = savedMaxDesyncFactor
- stack.MinMaxTempAddrPreferredLifetime = savedMinMaxTempAddrPreferredLifetime
- stack.MinMaxTempAddrValidLifetime = savedMinMaxTempAddrValidLifetime
+ ipv6.MaxDesyncFactor = savedMaxDesyncFactor
+ ipv6.MinMaxTempAddrPreferredLifetime = savedMinMaxTempAddrPreferredLifetime
+ ipv6.MinMaxTempAddrValidLifetime = savedMinMaxTempAddrValidLifetime
}()
- stack.MaxDesyncFactor = 0
- stack.MinMaxTempAddrPreferredLifetime = newMinVLDuration
- stack.MinMaxTempAddrValidLifetime = newMinVLDuration
+ ipv6.MaxDesyncFactor = 0
+ ipv6.MinMaxTempAddrPreferredLifetime = newMinVLDuration
+ ipv6.MinMaxTempAddrValidLifetime = newMinVLDuration
prefix, _, addr := prefixSubnetAddr(0, linkAddr1)
var tempIIDHistory [header.IIDSize]byte
@@ -2317,16 +2317,17 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, 2),
}
e := channel.New(0, 1280, linkAddr1)
- ndpConfigs := stack.NDPConfigurations{
+ ndpConfigs := ipv6.NDPConfigurations{
HandleRAs: true,
AutoGenGlobalAddresses: true,
AutoGenTempGlobalAddresses: true,
RegenAdvanceDuration: newMinVLDuration - regenAfter,
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: ndpConfigs,
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ndpConfigs,
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
@@ -2382,8 +2383,11 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
// Stop generating temporary addresses
ndpConfigs.AutoGenTempGlobalAddresses = false
- if err := s.SetNDPConfigurations(nicID, ndpConfigs); err != nil {
- t.Fatalf("s.SetNDPConfigurations(%d, _): %s", nicID, err)
+ if ipv6Ep, err := s.GetNetworkEndpoint(nicID, header.IPv6ProtocolNumber); err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+ } else {
+ ndpEP := ipv6Ep.(ipv6.NDPEndpoint)
+ ndpEP.SetNDPConfigurations(ndpConfigs)
}
// Wait for all the temporary addresses to get invalidated.
@@ -2439,17 +2443,17 @@ func TestAutoGenTempAddrRegenJobUpdates(t *testing.T) {
newMinVLDuration = newMinVL * time.Second
)
- savedMaxDesyncFactor := stack.MaxDesyncFactor
- savedMinMaxTempAddrPreferredLifetime := stack.MinMaxTempAddrPreferredLifetime
- savedMinMaxTempAddrValidLifetime := stack.MinMaxTempAddrValidLifetime
+ savedMaxDesyncFactor := ipv6.MaxDesyncFactor
+ savedMinMaxTempAddrPreferredLifetime := ipv6.MinMaxTempAddrPreferredLifetime
+ savedMinMaxTempAddrValidLifetime := ipv6.MinMaxTempAddrValidLifetime
defer func() {
- stack.MaxDesyncFactor = savedMaxDesyncFactor
- stack.MinMaxTempAddrPreferredLifetime = savedMinMaxTempAddrPreferredLifetime
- stack.MinMaxTempAddrValidLifetime = savedMinMaxTempAddrValidLifetime
+ ipv6.MaxDesyncFactor = savedMaxDesyncFactor
+ ipv6.MinMaxTempAddrPreferredLifetime = savedMinMaxTempAddrPreferredLifetime
+ ipv6.MinMaxTempAddrValidLifetime = savedMinMaxTempAddrValidLifetime
}()
- stack.MaxDesyncFactor = 0
- stack.MinMaxTempAddrPreferredLifetime = newMinVLDuration
- stack.MinMaxTempAddrValidLifetime = newMinVLDuration
+ ipv6.MaxDesyncFactor = 0
+ ipv6.MinMaxTempAddrPreferredLifetime = newMinVLDuration
+ ipv6.MinMaxTempAddrValidLifetime = newMinVLDuration
prefix, _, addr := prefixSubnetAddr(0, linkAddr1)
var tempIIDHistory [header.IIDSize]byte
@@ -2462,16 +2466,17 @@ func TestAutoGenTempAddrRegenJobUpdates(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, 2),
}
e := channel.New(0, 1280, linkAddr1)
- ndpConfigs := stack.NDPConfigurations{
+ ndpConfigs := ipv6.NDPConfigurations{
HandleRAs: true,
AutoGenGlobalAddresses: true,
AutoGenTempGlobalAddresses: true,
RegenAdvanceDuration: newMinVLDuration - regenAfter,
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: ndpConfigs,
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ndpConfigs,
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
@@ -2545,9 +2550,12 @@ func TestAutoGenTempAddrRegenJobUpdates(t *testing.T) {
// as paased.
ndpConfigs.MaxTempAddrValidLifetime = 100 * time.Second
ndpConfigs.MaxTempAddrPreferredLifetime = 100 * time.Second
- if err := s.SetNDPConfigurations(nicID, ndpConfigs); err != nil {
- t.Fatalf("s.SetNDPConfigurations(%d, _): %s", nicID, err)
+ ipv6Ep, err := s.GetNetworkEndpoint(nicID, header.IPv6ProtocolNumber)
+ if err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
}
+ ndpEP := ipv6Ep.(ipv6.NDPEndpoint)
+ ndpEP.SetNDPConfigurations(ndpConfigs)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix, true, true, 100, 100))
select {
case e := <-ndpDisp.autoGenAddrC:
@@ -2565,9 +2573,7 @@ func TestAutoGenTempAddrRegenJobUpdates(t *testing.T) {
newLifetimes := newMinVLDuration + regenAfter + defaultAsyncNegativeEventTimeout
ndpConfigs.MaxTempAddrValidLifetime = newLifetimes
ndpConfigs.MaxTempAddrPreferredLifetime = newLifetimes
- if err := s.SetNDPConfigurations(nicID, ndpConfigs); err != nil {
- t.Fatalf("s.SetNDPConfigurations(%d, _): %s", nicID, err)
- }
+ ndpEP.SetNDPConfigurations(ndpConfigs)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix, true, true, 100, 100))
expectAutoGenAddrEventAsync(tempAddr3, newAddr, regenAfter+defaultAsyncPositiveEventTimeout)
}
@@ -2655,20 +2661,21 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, 2),
}
e := channel.New(0, 1280, linkAddr1)
- ndpConfigs := stack.NDPConfigurations{
+ ndpConfigs := ipv6.NDPConfigurations{
HandleRAs: true,
AutoGenGlobalAddresses: true,
AutoGenTempGlobalAddresses: test.tempAddrs,
AutoGenAddressConflictRetries: 1,
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
- NDPConfigs: ndpConfigs,
- NDPDisp: &ndpDisp,
- OpaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: test.nicNameFromID,
- },
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ndpConfigs,
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: test.nicNameFromID,
+ },
+ })},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
s.SetRouteTable([]tcpip.Route{{
@@ -2739,8 +2746,11 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
ndpDisp.dadC = make(chan ndpDADEvent, 2)
ndpConfigs.DupAddrDetectTransmits = dupAddrTransmits
ndpConfigs.RetransmitTimer = retransmitTimer
- if err := s.SetNDPConfigurations(nicID, ndpConfigs); err != nil {
- t.Fatalf("s.SetNDPConfigurations(%d, _): %s", nicID, err)
+ if ipv6Ep, err := s.GetNetworkEndpoint(nicID, header.IPv6ProtocolNumber); err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+ } else {
+ ndpEP := ipv6Ep.(ipv6.NDPEndpoint)
+ ndpEP.SetNDPConfigurations(ndpConfigs)
}
// Do SLAAC for prefix.
@@ -2754,9 +2764,7 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
// DAD failure to restart the local generation process.
addr := test.addrs[maxSLAACAddrLocalRegenAttempts-1]
expectAutoGenAddrAsyncEvent(addr, newAddr)
- if err := s.DupTentativeAddrDetected(nicID, addr.Address); err != nil {
- t.Fatalf("s.DupTentativeAddrDetected(%d, %s): %s", nicID, addr.Address, err)
- }
+ rxNDPSolicit(e, addr.Address)
select {
case e := <-ndpDisp.dadC:
if diff := checkDADEvent(e, nicID, addr.Address, false, nil); diff != "" {
@@ -2794,14 +2802,15 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID, useN
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: ndpDisp,
- UseNeighborCache: useNeighborCache,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: ndpDisp,
+ })},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
+ UseNeighborCache: useNeighborCache,
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -3036,11 +3045,11 @@ func TestAutoGenAddrJobDeprecation(t *testing.T) {
for _, stackTyp := range stacks {
t.Run(stackTyp.name, func(t *testing.T) {
- saved := stack.MinPrefixInformationValidLifetimeForUpdate
+ saved := ipv6.MinPrefixInformationValidLifetimeForUpdate
defer func() {
- stack.MinPrefixInformationValidLifetimeForUpdate = saved
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = saved
}()
- stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
@@ -3258,12 +3267,12 @@ func TestAutoGenAddrFiniteToInfiniteToFiniteVL(t *testing.T) {
const infiniteVLSeconds = 2
const minVLSeconds = 1
savedIL := header.NDPInfiniteLifetime
- savedMinVL := stack.MinPrefixInformationValidLifetimeForUpdate
+ savedMinVL := ipv6.MinPrefixInformationValidLifetimeForUpdate
defer func() {
- stack.MinPrefixInformationValidLifetimeForUpdate = savedMinVL
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = savedMinVL
header.NDPInfiniteLifetime = savedIL
}()
- stack.MinPrefixInformationValidLifetimeForUpdate = minVLSeconds * time.Second
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = minVLSeconds * time.Second
header.NDPInfiniteLifetime = infiniteVLSeconds * time.Second
prefix, _, addr := prefixSubnetAddr(0, linkAddr1)
@@ -3307,12 +3316,13 @@ func TestAutoGenAddrFiniteToInfiniteToFiniteVL(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -3357,11 +3367,11 @@ func TestAutoGenAddrFiniteToInfiniteToFiniteVL(t *testing.T) {
func TestAutoGenAddrValidLifetimeUpdates(t *testing.T) {
const infiniteVL = 4294967295
const newMinVL = 4
- saved := stack.MinPrefixInformationValidLifetimeForUpdate
+ saved := ipv6.MinPrefixInformationValidLifetimeForUpdate
defer func() {
- stack.MinPrefixInformationValidLifetimeForUpdate = saved
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = saved
}()
- stack.MinPrefixInformationValidLifetimeForUpdate = newMinVL * time.Second
+ ipv6.MinPrefixInformationValidLifetimeForUpdate = newMinVL * time.Second
prefix, _, addr := prefixSubnetAddr(0, linkAddr1)
@@ -3449,12 +3459,13 @@ func TestAutoGenAddrValidLifetimeUpdates(t *testing.T) {
}
e := channel.New(10, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -3515,12 +3526,13 @@ func TestAutoGenAddrRemoval(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -3700,12 +3712,13 @@ func TestAutoGenAddrStaticConflict(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
@@ -3781,18 +3794,19 @@ func TestAutoGenAddrWithOpaqueIID(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
- OpaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: func(_ tcpip.NICID, nicName string) string {
- return nicName
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
},
- SecretKey: secretKey,
- },
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: func(_ tcpip.NICID, nicName string) string {
+ return nicName
+ },
+ SecretKey: secretKey,
+ },
+ })},
})
opts := stack.NICOptions{Name: nicName}
if err := s.CreateNICWithOptions(nicID, e, opts); err != nil {
@@ -3856,11 +3870,11 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
const lifetimeSeconds = 10
// Needed for the temporary address sub test.
- savedMaxDesync := stack.MaxDesyncFactor
+ savedMaxDesync := ipv6.MaxDesyncFactor
defer func() {
- stack.MaxDesyncFactor = savedMaxDesync
+ ipv6.MaxDesyncFactor = savedMaxDesync
}()
- stack.MaxDesyncFactor = time.Nanosecond
+ ipv6.MaxDesyncFactor = time.Nanosecond
var secretKeyBuf [header.OpaqueIIDSecretKeyMinBytes]byte
secretKey := secretKeyBuf[:]
@@ -3938,14 +3952,14 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
addrTypes := []struct {
name string
- ndpConfigs stack.NDPConfigurations
+ ndpConfigs ipv6.NDPConfigurations
autoGenLinkLocal bool
prepareFn func(t *testing.T, ndpDisp *ndpDispatcher, e *channel.Endpoint, tempIIDHistory []byte) []tcpip.AddressWithPrefix
addrGenFn func(dadCounter uint8, tempIIDHistory []byte) tcpip.AddressWithPrefix
}{
{
name: "Global address",
- ndpConfigs: stack.NDPConfigurations{
+ ndpConfigs: ipv6.NDPConfigurations{
DupAddrDetectTransmits: dadTransmits,
RetransmitTimer: retransmitTimer,
HandleRAs: true,
@@ -3963,7 +3977,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
},
{
name: "LinkLocal address",
- ndpConfigs: stack.NDPConfigurations{
+ ndpConfigs: ipv6.NDPConfigurations{
DupAddrDetectTransmits: dadTransmits,
RetransmitTimer: retransmitTimer,
},
@@ -3977,7 +3991,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
},
{
name: "Temporary address",
- ndpConfigs: stack.NDPConfigurations{
+ ndpConfigs: ipv6.NDPConfigurations{
DupAddrDetectTransmits: dadTransmits,
RetransmitTimer: retransmitTimer,
HandleRAs: true,
@@ -4029,16 +4043,17 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
ndpConfigs := addrType.ndpConfigs
ndpConfigs.AutoGenAddressConflictRetries = maxRetries
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
- NDPConfigs: ndpConfigs,
- NDPDisp: &ndpDisp,
- OpaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: func(_ tcpip.NICID, nicName string) string {
- return nicName
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
+ NDPConfigs: ndpConfigs,
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: func(_ tcpip.NICID, nicName string) string {
+ return nicName
+ },
+ SecretKey: secretKey,
},
- SecretKey: secretKey,
- },
+ })},
})
opts := stack.NICOptions{Name: nicName}
if err := s.CreateNICWithOptions(nicID, e, opts); err != nil {
@@ -4059,9 +4074,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
}
// Simulate a DAD conflict.
- if err := s.DupTentativeAddrDetected(nicID, addr.Address); err != nil {
- t.Fatalf("s.DupTentativeAddrDetected(%d, %s): %s", nicID, addr.Address, err)
- }
+ rxNDPSolicit(e, addr.Address)
expectAutoGenAddrEvent(t, &ndpDisp, addr, invalidatedAddr)
expectDADEvent(t, &ndpDisp, addr.Address, false)
@@ -4119,14 +4132,14 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
addrTypes := []struct {
name string
- ndpConfigs stack.NDPConfigurations
+ ndpConfigs ipv6.NDPConfigurations
autoGenLinkLocal bool
subnet tcpip.Subnet
triggerSLAACFn func(e *channel.Endpoint)
}{
{
name: "Global address",
- ndpConfigs: stack.NDPConfigurations{
+ ndpConfigs: ipv6.NDPConfigurations{
DupAddrDetectTransmits: dadTransmits,
RetransmitTimer: retransmitTimer,
HandleRAs: true,
@@ -4142,7 +4155,7 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
},
{
name: "LinkLocal address",
- ndpConfigs: stack.NDPConfigurations{
+ ndpConfigs: ipv6.NDPConfigurations{
DupAddrDetectTransmits: dadTransmits,
RetransmitTimer: retransmitTimer,
AutoGenAddressConflictRetries: maxRetries,
@@ -4165,10 +4178,11 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
- NDPConfigs: addrType.ndpConfigs,
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
+ NDPConfigs: addrType.ndpConfigs,
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -4198,9 +4212,7 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
expectAutoGenAddrEvent(addr, newAddr)
// Simulate a DAD conflict.
- if err := s.DupTentativeAddrDetected(nicID, addr.Address); err != nil {
- t.Fatalf("s.DupTentativeAddrDetected(%d, %s): %s", nicID, addr.Address, err)
- }
+ rxNDPSolicit(e, addr.Address)
expectAutoGenAddrEvent(addr, invalidatedAddr)
select {
case e := <-ndpDisp.dadC:
@@ -4250,21 +4262,22 @@ func TestAutoGenAddrContinuesLifetimesAfterRetry(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- DupAddrDetectTransmits: dadTransmits,
- RetransmitTimer: retransmitTimer,
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- AutoGenAddressConflictRetries: maxRetries,
- },
- NDPDisp: &ndpDisp,
- OpaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: func(_ tcpip.NICID, nicName string) string {
- return nicName
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ DupAddrDetectTransmits: dadTransmits,
+ RetransmitTimer: retransmitTimer,
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ AutoGenAddressConflictRetries: maxRetries,
},
- SecretKey: secretKey,
- },
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: func(_ tcpip.NICID, nicName string) string {
+ return nicName
+ },
+ SecretKey: secretKey,
+ },
+ })},
})
opts := stack.NICOptions{Name: nicName}
if err := s.CreateNICWithOptions(nicID, e, opts); err != nil {
@@ -4296,9 +4309,7 @@ func TestAutoGenAddrContinuesLifetimesAfterRetry(t *testing.T) {
// Simulate a DAD conflict after some time has passed.
time.Sleep(failureTimer)
- if err := s.DupTentativeAddrDetected(nicID, addr.Address); err != nil {
- t.Fatalf("s.DupTentativeAddrDetected(%d, %s): %s", nicID, addr.Address, err)
- }
+ rxNDPSolicit(e, addr.Address)
expectAutoGenAddrEvent(addr, invalidatedAddr)
select {
case e := <-ndpDisp.dadC:
@@ -4459,11 +4470,12 @@ func TestNDPRecursiveDNSServerDispatch(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(1, e); err != nil {
t.Fatalf("CreateNIC(1) = %s", err)
@@ -4509,11 +4521,12 @@ func TestNDPDNSSearchListDispatch(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -4694,15 +4707,16 @@ func TestCleanupNDPState(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, test.maxAutoGenAddrEvents),
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- AutoGenIPv6LinkLocal: true,
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- DiscoverDefaultRouters: true,
- DiscoverOnLinkPrefixes: true,
- AutoGenGlobalAddresses: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ AutoGenIPv6LinkLocal: true,
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: true,
+ DiscoverOnLinkPrefixes: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
expectRouterEvent := func() (bool, ndpRouterEvent) {
@@ -4967,18 +4981,19 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ },
+ NDPDisp: &ndpDisp,
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
- expectDHCPv6Event := func(configuration stack.DHCPv6ConfigurationFromNDPRA) {
+ expectDHCPv6Event := func(configuration ipv6.DHCPv6ConfigurationFromNDPRA) {
t.Helper()
select {
case e := <-ndpDisp.dhcpv6ConfigurationC:
@@ -5002,7 +5017,7 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
// Even if the first RA reports no DHCPv6 configurations are available, the
// dispatcher should get an event.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
- expectDHCPv6Event(stack.DHCPv6NoConfiguration)
+ expectDHCPv6Event(ipv6.DHCPv6NoConfiguration)
// Receiving the same update again should not result in an event to the
// dispatcher.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
@@ -5011,19 +5026,19 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
// Receive an RA that updates the DHCPv6 configuration to Other
// Configurations.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
- expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
+ expectDHCPv6Event(ipv6.DHCPv6OtherConfigurations)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectNoDHCPv6Event()
// Receive an RA that updates the DHCPv6 configuration to Managed Address.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, true, false))
- expectDHCPv6Event(stack.DHCPv6ManagedAddress)
+ expectDHCPv6Event(ipv6.DHCPv6ManagedAddress)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, true, false))
expectNoDHCPv6Event()
// Receive an RA that updates the DHCPv6 configuration to none.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
- expectDHCPv6Event(stack.DHCPv6NoConfiguration)
+ expectDHCPv6Event(ipv6.DHCPv6NoConfiguration)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
expectNoDHCPv6Event()
@@ -5031,7 +5046,7 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
//
// Note, when the M flag is set, the O flag is redundant.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, true, true))
- expectDHCPv6Event(stack.DHCPv6ManagedAddress)
+ expectDHCPv6Event(ipv6.DHCPv6ManagedAddress)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, true, true))
expectNoDHCPv6Event()
// Even though the DHCPv6 flags are different, the effective configuration is
@@ -5044,7 +5059,7 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
// Receive an RA that updates the DHCPv6 configuration to Other
// Configurations.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
- expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
+ expectDHCPv6Event(ipv6.DHCPv6OtherConfigurations)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectNoDHCPv6Event()
@@ -5059,7 +5074,7 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
// Receive an RA that updates the DHCPv6 configuration to Other
// Configurations.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
- expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
+ expectDHCPv6Event(ipv6.DHCPv6OtherConfigurations)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectNoDHCPv6Event()
}
@@ -5217,12 +5232,13 @@ func TestRouterSolicitation(t *testing.T) {
}
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- MaxRtrSolicitations: test.maxRtrSolicit,
- RtrSolicitationInterval: test.rtrSolicitInt,
- MaxRtrSolicitationDelay: test.maxRtrSolicitDelay,
- },
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ MaxRtrSolicitations: test.maxRtrSolicit,
+ RtrSolicitationInterval: test.rtrSolicitInt,
+ MaxRtrSolicitationDelay: test.maxRtrSolicitDelay,
+ },
+ })},
})
if err := s.CreateNIC(nicID, &e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -5357,12 +5373,13 @@ func TestStopStartSolicitingRouters(t *testing.T) {
checker.NDPRS())
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- MaxRtrSolicitations: maxRtrSolicitations,
- RtrSolicitationInterval: interval,
- MaxRtrSolicitationDelay: delay,
- },
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ MaxRtrSolicitations: maxRtrSolicitations,
+ RtrSolicitationInterval: interval,
+ MaxRtrSolicitationDelay: delay,
+ },
+ })},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
diff --git a/pkg/tcpip/stack/neighbor_entry.go b/pkg/tcpip/stack/neighbor_entry.go
index 213646160..9a72bec79 100644
--- a/pkg/tcpip/stack/neighbor_entry.go
+++ b/pkg/tcpip/stack/neighbor_entry.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
// NeighborEntry describes a neighboring device in the local network.
@@ -439,7 +440,7 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
e.notifyWakersLocked()
}
- if e.isRouter && !flags.IsRouter {
+ if e.isRouter && !flags.IsRouter && header.IsV6UnicastAddress(e.neigh.Addr) {
// "In those cases where the IsRouter flag changes from TRUE to FALSE as
// a result of this update, the node MUST remove that router from the
// Default Router List and update the Destination Cache entries for all
@@ -447,9 +448,17 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
// 7.3.3. This is needed to detect when a node that is used as a router
// stops forwarding packets due to being configured as a host."
// - RFC 4861 section 7.2.5
- e.nic.mu.Lock()
- e.nic.mu.ndp.invalidateDefaultRouter(e.neigh.Addr)
- e.nic.mu.Unlock()
+ //
+ // TODO(gvisor.dev/issue/4085): Remove the special casing we do for IPv6
+ // here.
+ ep, ok := e.nic.networkEndpoints[header.IPv6ProtocolNumber]
+ if !ok {
+ panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint"))
+ }
+
+ if ndpEP, ok := ep.(NDPEndpoint); ok {
+ ndpEP.InvalidateDefaultRouter(e.neigh.Addr)
+ }
}
e.isRouter = flags.IsRouter
diff --git a/pkg/tcpip/stack/neighbor_entry_test.go b/pkg/tcpip/stack/neighbor_entry_test.go
index e530ec7ea..a265fff0a 100644
--- a/pkg/tcpip/stack/neighbor_entry_test.go
+++ b/pkg/tcpip/stack/neighbor_entry_test.go
@@ -28,6 +28,7 @@ import (
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/faketime"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
const (
@@ -233,18 +234,15 @@ func entryTestSetup(c NUDConfigurations) (*neighborEntry, *testNUDDispatcher, *e
nudDisp: &disp,
},
}
+ nic.networkEndpoints = map[tcpip.NetworkProtocolNumber]NetworkEndpoint{
+ header.IPv6ProtocolNumber: (&testIPv6Protocol{}).NewEndpoint(&nic, nil, nil, nil),
+ }
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
nudState := NewNUDState(c, rng)
linkRes := entryTestLinkResolver{}
entry := newNeighborEntry(&nic, entryTestAddr1 /* remoteAddr */, entryTestAddr2 /* localAddr */, nudState, &linkRes)
- // Stub out ndpState to verify modification of default routers.
- nic.mu.ndp = ndpState{
- nic: &nic,
- defaultRouters: make(map[tcpip.Address]defaultRouterState),
- }
-
// Stub out the neighbor cache to verify deletion from the cache.
nic.neigh = &neighborCache{
nic: &nic,
@@ -817,6 +815,8 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
c := DefaultNUDConfigurations()
e, nudDisp, linkRes, _ := entryTestSetup(c)
+ ipv6EP := e.nic.networkEndpoints[header.IPv6ProtocolNumber].(*testIPv6Endpoint)
+
e.mu.Lock()
e.handlePacketQueuedLocked()
e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
@@ -830,9 +830,7 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
if got, want := e.isRouter, true; got != want {
t.Errorf("got e.isRouter = %t, want = %t", got, want)
}
- e.nic.mu.ndp.defaultRouters[entryTestAddr1] = defaultRouterState{
- invalidationJob: e.nic.stack.newJob(&testLocker{}, func() {}),
- }
+
e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
Solicited: false,
Override: false,
@@ -841,8 +839,8 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
if got, want := e.isRouter, false; got != want {
t.Errorf("got e.isRouter = %t, want = %t", got, want)
}
- if _, ok := e.nic.mu.ndp.defaultRouters[entryTestAddr1]; ok {
- t.Errorf("unexpected defaultRouter for %s", entryTestAddr1)
+ if ipv6EP.invalidatedRtr != e.neigh.Addr {
+ t.Errorf("got ipv6EP.invalidatedRtr = %s, want = %s", ipv6EP.invalidatedRtr, e.neigh.Addr)
}
e.mu.Unlock()
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 06d70dd1c..212c6edae 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -18,7 +18,6 @@ import (
"fmt"
"math/rand"
"reflect"
- "sort"
"sync/atomic"
"gvisor.dev/gvisor/pkg/sleep"
@@ -28,13 +27,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/header"
)
-var ipv4BroadcastAddr = tcpip.ProtocolAddress{
- Protocol: header.IPv4ProtocolNumber,
- AddressWithPrefix: tcpip.AddressWithPrefix{
- Address: header.IPv4Broadcast,
- PrefixLen: 8 * header.IPv4AddressSize,
- },
-}
+var _ NetworkInterface = (*NIC)(nil)
// NIC represents a "network interface card" to which the networking stack is
// attached.
@@ -49,18 +42,18 @@ type NIC struct {
neigh *neighborCache
networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint
+ // enabled is set to 1 when the NIC is enabled and 0 when it is disabled.
+ //
+ // Must be accessed using atomic operations.
+ enabled uint32
+
mu struct {
sync.RWMutex
- enabled bool
spoofing bool
promiscuous bool
- primary map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint
- endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
- mcastJoins map[NetworkEndpointID]uint32
// packetEPs is protected by mu, but the contained PacketEndpoint
// values are not.
packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
- ndp ndpState
}
}
@@ -84,25 +77,6 @@ type DirectionStats struct {
Bytes *tcpip.StatCounter
}
-// PrimaryEndpointBehavior is an enumeration of an endpoint's primacy behavior.
-type PrimaryEndpointBehavior int
-
-const (
- // CanBePrimaryEndpoint indicates the endpoint can be used as a primary
- // endpoint for new connections with no local address. This is the
- // default when calling NIC.AddAddress.
- CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
-
- // FirstPrimaryEndpoint indicates the endpoint should be the first
- // primary endpoint considered. If there are multiple endpoints with
- // this behavior, the most recently-added one will be first.
- FirstPrimaryEndpoint
-
- // NeverPrimaryEndpoint indicates the endpoint should never be a
- // primary endpoint.
- NeverPrimaryEndpoint
-)
-
// newNIC returns a new NIC using the default NDP configurations from stack.
func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICContext) *NIC {
// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
@@ -122,19 +96,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
stats: makeNICStats(),
networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
}
- nic.mu.primary = make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint)
- nic.mu.endpoints = make(map[NetworkEndpointID]*referencedNetworkEndpoint)
- nic.mu.mcastJoins = make(map[NetworkEndpointID]uint32)
nic.mu.packetEPs = make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint)
- nic.mu.ndp = ndpState{
- nic: nic,
- configs: stack.ndpConfigs,
- dad: make(map[tcpip.Address]dadState),
- defaultRouters: make(map[tcpip.Address]defaultRouterState),
- onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState),
- slaacPrefixes: make(map[tcpip.Subnet]slaacPrefixState),
- }
- nic.mu.ndp.initializeTempAddrState()
// Check for Neighbor Unreachability Detection support.
var nud NUDHandler
@@ -162,7 +124,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
for _, netProto := range stack.networkProtocols {
netNum := netProto.Number()
nic.mu.packetEPs[netNum] = nil
- nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nud, nic, ep, stack)
+ nic.networkEndpoints[netNum] = netProto.NewEndpoint(nic, stack, nud, nic)
}
nic.linkEP.Attach(nic)
@@ -170,29 +132,28 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
return nic
}
-// enabled returns true if n is enabled.
-func (n *NIC) enabled() bool {
- n.mu.RLock()
- enabled := n.mu.enabled
- n.mu.RUnlock()
- return enabled
+// Enabled implements NetworkInterface.
+func (n *NIC) Enabled() bool {
+ return atomic.LoadUint32(&n.enabled) == 1
}
-// disable disables n.
+// setEnabled sets the enabled status for the NIC.
//
-// It undoes the work done by enable.
-func (n *NIC) disable() *tcpip.Error {
- n.mu.RLock()
- enabled := n.mu.enabled
- n.mu.RUnlock()
- if !enabled {
- return nil
+// Returns true if the enabled status was updated.
+func (n *NIC) setEnabled(v bool) bool {
+ if v {
+ return atomic.SwapUint32(&n.enabled, 1) == 0
}
+ return atomic.SwapUint32(&n.enabled, 0) == 1
+}
+// disable disables n.
+//
+// It undoes the work done by enable.
+func (n *NIC) disable() {
n.mu.Lock()
- err := n.disableLocked()
+ n.disableLocked()
n.mu.Unlock()
- return err
}
// disableLocked disables n.
@@ -200,9 +161,9 @@ func (n *NIC) disable() *tcpip.Error {
// It undoes the work done by enable.
//
// n MUST be locked.
-func (n *NIC) disableLocked() *tcpip.Error {
- if !n.mu.enabled {
- return nil
+func (n *NIC) disableLocked() {
+ if !n.setEnabled(false) {
+ return
}
// TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be
@@ -210,38 +171,9 @@ func (n *NIC) disableLocked() *tcpip.Error {
// again, and applications may not know that the underlying NIC was ever
// disabled.
- if _, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]; ok {
- n.mu.ndp.stopSolicitingRouters()
- n.mu.ndp.cleanupState(false /* hostOnly */)
-
- // Stop DAD for all the unicast IPv6 endpoints that are in the
- // permanentTentative state.
- for _, r := range n.mu.endpoints {
- if addr := r.address(); r.getKind() == permanentTentative && header.IsV6UnicastAddress(addr) {
- n.mu.ndp.stopDuplicateAddressDetection(addr)
- }
- }
-
- // The NIC may have already left the multicast group.
- if err := n.leaveGroupLocked(header.IPv6AllNodesMulticastAddress, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
- return err
- }
- }
-
- if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
- // The NIC may have already left the multicast group.
- if err := n.leaveGroupLocked(header.IPv4AllSystems, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
- return err
- }
-
- // The address may have already been removed.
- if err := n.removePermanentAddressLocked(ipv4BroadcastAddr.AddressWithPrefix.Address); err != nil && err != tcpip.ErrBadLocalAddress {
- return err
- }
+ for _, ep := range n.networkEndpoints {
+ ep.Disable()
}
-
- n.mu.enabled = false
- return nil
}
// enable enables n.
@@ -251,162 +183,39 @@ func (n *NIC) disableLocked() *tcpip.Error {
// routers if the stack is not operating as a router. If the stack is also
// configured to auto-generate a link-local address, one will be generated.
func (n *NIC) enable() *tcpip.Error {
- n.mu.RLock()
- enabled := n.mu.enabled
- n.mu.RUnlock()
- if enabled {
- return nil
- }
-
n.mu.Lock()
defer n.mu.Unlock()
- if n.mu.enabled {
+ if !n.setEnabled(true) {
return nil
}
- n.mu.enabled = true
-
- // Create an endpoint to receive broadcast packets on this interface.
- if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
- if _, err := n.addAddressLocked(ipv4BroadcastAddr, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
- return err
- }
-
- // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
- // multicast group. Note, the IANA calls the all-hosts multicast group the
- // all-systems multicast group.
- if err := n.joinGroupLocked(header.IPv4ProtocolNumber, header.IPv4AllSystems); err != nil {
- return err
- }
- }
-
- // Join the IPv6 All-Nodes Multicast group if the stack is configured to
- // use IPv6. This is required to ensure that this node properly receives
- // and responds to the various NDP messages that are destined to the
- // all-nodes multicast address. An example is the Neighbor Advertisement
- // when we perform Duplicate Address Detection, or Router Advertisement
- // when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
- // section 4.2 for more information.
- //
- // Also auto-generate an IPv6 link-local address based on the NIC's
- // link address if it is configured to do so. Note, each interface is
- // required to have IPv6 link-local unicast address, as per RFC 4291
- // section 2.1.
- _, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]
- if !ok {
- return nil
- }
-
- // Join the All-Nodes multicast group before starting DAD as responses to DAD
- // (NDP NS) messages may be sent to the All-Nodes multicast group if the
- // source address of the NDP NS is the unspecified address, as per RFC 4861
- // section 7.2.4.
- if err := n.joinGroupLocked(header.IPv6ProtocolNumber, header.IPv6AllNodesMulticastAddress); err != nil {
- return err
- }
-
- // Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
- // state.
- //
- // Addresses may have aleady completed DAD but in the time since the NIC was
- // last enabled, other devices may have acquired the same addresses.
- for _, r := range n.mu.endpoints {
- addr := r.address()
- if k := r.getKind(); (k != permanent && k != permanentTentative) || !header.IsV6UnicastAddress(addr) {
- continue
- }
-
- r.setKind(permanentTentative)
- if err := n.mu.ndp.startDuplicateAddressDetection(addr, r); err != nil {
+ for _, ep := range n.networkEndpoints {
+ if err := ep.Enable(); err != nil {
return err
}
}
- // Do not auto-generate an IPv6 link-local address for loopback devices.
- if n.stack.autoGenIPv6LinkLocal && !n.isLoopback() {
- // The valid and preferred lifetime is infinite for the auto-generated
- // link-local address.
- n.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
- }
-
- // If we are operating as a router, then do not solicit routers since we
- // won't process the RAs anyways.
- //
- // Routers do not process Router Advertisements (RA) the same way a host
- // does. That is, routers do not learn from RAs (e.g. on-link prefixes
- // and default routers). Therefore, soliciting RAs from other routers on
- // a link is unnecessary for routers.
- if !n.stack.Forwarding(header.IPv6ProtocolNumber) {
- n.mu.ndp.startSolicitingRouters()
- }
-
return nil
}
-// remove detaches NIC from the link endpoint, and marks existing referenced
-// network endpoints expired. This guarantees no packets between this NIC and
-// the network stack.
+// remove detaches NIC from the link endpoint and releases network endpoint
+// resources. This guarantees no packets between this NIC and the network
+// stack.
func (n *NIC) remove() *tcpip.Error {
n.mu.Lock()
defer n.mu.Unlock()
n.disableLocked()
- // TODO(b/151378115): come up with a better way to pick an error than the
- // first one.
- var err *tcpip.Error
-
- // Forcefully leave multicast groups.
- for nid := range n.mu.mcastJoins {
- if tempErr := n.leaveGroupLocked(nid.LocalAddress, true /* force */); tempErr != nil && err == nil {
- err = tempErr
- }
- }
-
- // Remove permanent and permanentTentative addresses, so no packet goes out.
- for nid, ref := range n.mu.endpoints {
- switch ref.getKind() {
- case permanentTentative, permanent:
- if tempErr := n.removePermanentAddressLocked(nid.LocalAddress); tempErr != nil && err == nil {
- err = tempErr
- }
- }
- }
-
- // Release any resources the network endpoint may hold.
for _, ep := range n.networkEndpoints {
ep.Close()
}
+ n.networkEndpoints = nil
// Detach from link endpoint, so no packet comes in.
n.linkEP.Attach(nil)
-
- return err
-}
-
-// becomeIPv6Router transitions n into an IPv6 router.
-//
-// When transitioning into an IPv6 router, host-only state (NDP discovered
-// routers, discovered on-link prefixes, and auto-generated addresses) will
-// be cleaned up/invalidated and NDP router solicitations will be stopped.
-func (n *NIC) becomeIPv6Router() {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- n.mu.ndp.cleanupState(true /* hostOnly */)
- n.mu.ndp.stopSolicitingRouters()
-}
-
-// becomeIPv6Host transitions n into an IPv6 host.
-//
-// When transitioning into an IPv6 host, NDP router solicitations will be
-// started.
-func (n *NIC) becomeIPv6Host() {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- n.mu.ndp.startSolicitingRouters()
+ return nil
}
// setPromiscuousMode enables or disables promiscuous mode.
@@ -423,7 +232,8 @@ func (n *NIC) isPromiscuousMode() bool {
return rv
}
-func (n *NIC) isLoopback() bool {
+// IsLoopback implements NetworkInterface.
+func (n *NIC) IsLoopback() bool {
return n.linkEP.Capabilities()&CapabilityLoopback != 0
}
@@ -440,200 +250,41 @@ func (n *NIC) setSpoofing(enable bool) {
//
// If an IPv6 primary endpoint is requested, Source Address Selection (as
// defined by RFC 6724 section 5) will be performed.
-func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) *referencedNetworkEndpoint {
- if protocol == header.IPv6ProtocolNumber && len(remoteAddr) != 0 {
- return n.primaryIPv6Endpoint(remoteAddr)
- }
-
+func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) AssignableAddressEndpoint {
n.mu.RLock()
defer n.mu.RUnlock()
- var deprecatedEndpoint *referencedNetworkEndpoint
- for _, r := range n.mu.primary[protocol] {
- if !r.isValidForOutgoingRLocked() {
- continue
- }
-
- if !r.deprecated {
- if r.tryIncRef() {
- // r is not deprecated, so return it immediately.
- //
- // If we kept track of a deprecated endpoint, decrement its reference
- // count since it was incremented when we decided to keep track of it.
- if deprecatedEndpoint != nil {
- deprecatedEndpoint.decRefLocked()
- deprecatedEndpoint = nil
- }
-
- return r
- }
- } else if deprecatedEndpoint == nil && r.tryIncRef() {
- // We prefer an endpoint that is not deprecated, but we keep track of r in
- // case n doesn't have any non-deprecated endpoints.
- //
- // If we end up finding a more preferred endpoint, r's reference count
- // will be decremented when such an endpoint is found.
- deprecatedEndpoint = r
- }
- }
-
- // n doesn't have any valid non-deprecated endpoints, so return
- // deprecatedEndpoint (which may be nil if n doesn't have any valid deprecated
- // endpoints either).
- return deprecatedEndpoint
-}
-
-// ipv6AddrCandidate is an IPv6 candidate for Source Address Selection (RFC
-// 6724 section 5).
-type ipv6AddrCandidate struct {
- ref *referencedNetworkEndpoint
- scope header.IPv6AddressScope
-}
-
-// primaryIPv6Endpoint returns an IPv6 endpoint following Source Address
-// Selection (RFC 6724 section 5).
-//
-// Note, only rules 1-3 and 7 are followed.
-//
-// remoteAddr must be a valid IPv6 address.
-func (n *NIC) primaryIPv6Endpoint(remoteAddr tcpip.Address) *referencedNetworkEndpoint {
- n.mu.RLock()
- ref := n.primaryIPv6EndpointRLocked(remoteAddr)
- n.mu.RUnlock()
- return ref
-}
-
-// primaryIPv6EndpointLocked returns an IPv6 endpoint following Source Address
-// Selection (RFC 6724 section 5).
-//
-// Note, only rules 1-3 and 7 are followed.
-//
-// remoteAddr must be a valid IPv6 address.
-//
-// n.mu MUST be read locked.
-func (n *NIC) primaryIPv6EndpointRLocked(remoteAddr tcpip.Address) *referencedNetworkEndpoint {
- primaryAddrs := n.mu.primary[header.IPv6ProtocolNumber]
-
- if len(primaryAddrs) == 0 {
- return nil
- }
-
- // Create a candidate set of available addresses we can potentially use as a
- // source address.
- cs := make([]ipv6AddrCandidate, 0, len(primaryAddrs))
- for _, r := range primaryAddrs {
- // If r is not valid for outgoing connections, it is not a valid endpoint.
- if !r.isValidForOutgoingRLocked() {
- continue
- }
-
- addr := r.address()
- scope, err := header.ScopeForIPv6Address(addr)
- if err != nil {
- // Should never happen as we got r from the primary IPv6 endpoint list and
- // ScopeForIPv6Address only returns an error if addr is not an IPv6
- // address.
- panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
- }
-
- cs = append(cs, ipv6AddrCandidate{
- ref: r,
- scope: scope,
- })
- }
-
- remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
- if err != nil {
- // primaryIPv6Endpoint should never be called with an invalid IPv6 address.
- panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
- }
-
- // Sort the addresses as per RFC 6724 section 5 rules 1-3.
- //
- // TODO(b/146021396): Implement rules 4-8 of RFC 6724 section 5.
- sort.Slice(cs, func(i, j int) bool {
- sa := cs[i]
- sb := cs[j]
-
- // Prefer same address as per RFC 6724 section 5 rule 1.
- if sa.ref.address() == remoteAddr {
- return true
- }
- if sb.ref.address() == remoteAddr {
- return false
- }
-
- // Prefer appropriate scope as per RFC 6724 section 5 rule 2.
- if sa.scope < sb.scope {
- return sa.scope >= remoteScope
- } else if sb.scope < sa.scope {
- return sb.scope < remoteScope
- }
-
- // Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
- if saDep, sbDep := sa.ref.deprecated, sb.ref.deprecated; saDep != sbDep {
- // If sa is not deprecated, it is preferred over sb.
- return sbDep
- }
-
- // Prefer temporary addresses as per RFC 6724 section 5 rule 7.
- if saTemp, sbTemp := sa.ref.configType == slaacTemp, sb.ref.configType == slaacTemp; saTemp != sbTemp {
- return saTemp
- }
-
- // sa and sb are equal, return the endpoint that is closest to the front of
- // the primary endpoint list.
- return i < j
- })
-
- // Return the most preferred address that can have its reference count
- // incremented.
- for _, c := range cs {
- if r := c.ref; r.tryIncRef() {
- return r
- }
- }
-
- return nil
-}
-
-// hasPermanentAddrLocked returns true if n has a permanent (including currently
-// tentative) address, addr.
-func (n *NIC) hasPermanentAddrLocked(addr tcpip.Address) bool {
- ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]
-
+ ep, ok := n.networkEndpoints[protocol]
if !ok {
- return false
+ return nil
}
- kind := ref.getKind()
-
- return kind == permanent || kind == permanentTentative
+ return ep.AcquirePrimaryAddress(remoteAddr, n.mu.spoofing)
}
-type getRefBehaviour int
+type getAddressBehaviour int
const (
// spoofing indicates that the NIC's spoofing flag should be observed when
- // getting a NIC's referenced network endpoint.
- spoofing getRefBehaviour = iota
+ // getting a NIC's address endpoint.
+ spoofing getAddressBehaviour = iota
// promiscuous indicates that the NIC's promiscuous flag should be observed
- // when getting a NIC's referenced network endpoint.
+ // when getting a NIC's address endpoint.
promiscuous
)
-func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
- return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
+func (n *NIC) getAddress(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) AssignableAddressEndpoint {
+ return n.getAddressOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
}
// findEndpoint finds the endpoint, if any, with the given address.
-func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
- return n.getRefOrCreateTemp(protocol, address, peb, spoofing)
+func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
+ return n.getAddressOrCreateTemp(protocol, address, peb, spoofing)
}
-// getRefEpOrCreateTemp returns the referenced network endpoint for the given
-// protocol and address.
+// getAddressEpOrCreateTemp returns the address endpoint for the given protocol
+// and address.
//
// If none exists a temporary one may be created if we are in promiscuous mode
// or spoofing. Promiscuous mode will only be checked if promiscuous is true.
@@ -641,9 +292,8 @@ func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.A
//
// If the address is the IPv4 broadcast address for an endpoint's network, that
// endpoint will be returned.
-func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getRefBehaviour) *referencedNetworkEndpoint {
+func (n *NIC) getAddressOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getAddressBehaviour) AssignableAddressEndpoint {
n.mu.RLock()
-
var spoofingOrPromiscuous bool
switch tempRef {
case spoofing:
@@ -651,274 +301,54 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
case promiscuous:
spoofingOrPromiscuous = n.mu.promiscuous
}
-
- if ref, ok := n.mu.endpoints[NetworkEndpointID{address}]; ok {
- // An endpoint with this id exists, check if it can be used and return it.
- if !ref.isAssignedRLocked(spoofingOrPromiscuous) {
- n.mu.RUnlock()
- return nil
- }
-
- if ref.tryIncRef() {
- n.mu.RUnlock()
- return ref
- }
- }
-
- if protocol == header.IPv4ProtocolNumber {
- if ref := n.getIPv4RefForBroadcastOrLoopbackRLocked(address); ref != nil {
- n.mu.RUnlock()
- return ref
- }
- }
n.mu.RUnlock()
-
- if !spoofingOrPromiscuous {
- return nil
- }
-
- // Try again with the lock in exclusive mode. If we still can't get the
- // endpoint, create a new "temporary" endpoint. It will only exist while
- // there's a route through it.
- n.mu.Lock()
- ref := n.getRefOrCreateTempLocked(protocol, address, peb)
- n.mu.Unlock()
- return ref
+ return n.getAddressOrCreateTempInner(protocol, address, spoofingOrPromiscuous, peb)
}
-// getRefForBroadcastOrLoopbackRLocked returns an endpoint whose address is the
-// broadcast address for the endpoint's network or an address in the endpoint's
-// subnet if the NIC is a loopback interface. This matches linux behaviour.
-//
-// n.mu MUST be read or write locked.
-func (n *NIC) getIPv4RefForBroadcastOrLoopbackRLocked(address tcpip.Address) *referencedNetworkEndpoint {
- for _, ref := range n.mu.endpoints {
- // Only IPv4 has a notion of broadcast addresses or considers the loopback
- // interface bound to an address's whole subnet (on linux).
- if ref.protocol != header.IPv4ProtocolNumber {
- continue
- }
-
- subnet := ref.addrWithPrefix().Subnet()
- if (subnet.IsBroadcast(address) || (n.isLoopback() && subnet.Contains(address))) && ref.isValidForOutgoingRLocked() && ref.tryIncRef() {
- return ref
- }
+// getAddressOrCreateTempInner is like getAddressEpOrCreateTemp except a boolean
+// is passed to indicate whether or not we should generate temporary endpoints.
+func (n *NIC) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, createTemp bool, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
+ if ep, ok := n.networkEndpoints[protocol]; ok {
+ return ep.AcquireAssignedAddress(address, createTemp, peb)
}
return nil
}
-/// getRefOrCreateTempLocked returns an existing endpoint for address or creates
-/// and returns a temporary endpoint.
-//
-// If the address is the IPv4 broadcast address for an endpoint's network, that
-// endpoint will be returned.
-//
-// n.mu must be write locked.
-func (n *NIC) getRefOrCreateTempLocked(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
- if ref, ok := n.mu.endpoints[NetworkEndpointID{address}]; ok {
- // No need to check the type as we are ok with expired endpoints at this
- // point.
- if ref.tryIncRef() {
- return ref
- }
- // tryIncRef failing means the endpoint is scheduled to be removed once the
- // lock is released. Remove it here so we can create a new (temporary) one.
- // The removal logic waiting for the lock handles this case.
- n.removeEndpointLocked(ref)
- }
-
- if protocol == header.IPv4ProtocolNumber {
- if ref := n.getIPv4RefForBroadcastOrLoopbackRLocked(address); ref != nil {
- return ref
- }
- }
-
- // Add a new temporary endpoint.
- netProto, ok := n.stack.networkProtocols[protocol]
- if !ok {
- return nil
- }
- ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{
- Protocol: protocol,
- AddressWithPrefix: tcpip.AddressWithPrefix{
- Address: address,
- PrefixLen: netProto.DefaultPrefixLen(),
- },
- }, peb, temporary, static, false)
- return ref
-}
-
-// addAddressLocked adds a new protocolAddress to n.
-//
-// If n already has the address in a non-permanent state, and the kind given is
-// permanent, that address will be promoted in place and its properties set to
-// the properties provided. Otherwise, it returns tcpip.ErrDuplicateAddress.
-func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind, configType networkEndpointConfigType, deprecated bool) (*referencedNetworkEndpoint, *tcpip.Error) {
- // TODO(b/141022673): Validate IP addresses before adding them.
-
- // Sanity check.
- id := NetworkEndpointID{LocalAddress: protocolAddress.AddressWithPrefix.Address}
- if ref, ok := n.mu.endpoints[id]; ok {
- // Endpoint already exists.
- if kind != permanent {
- return nil, tcpip.ErrDuplicateAddress
- }
- switch ref.getKind() {
- case permanentTentative, permanent:
- // The NIC already have a permanent endpoint with that address.
- return nil, tcpip.ErrDuplicateAddress
- case permanentExpired, temporary:
- // Promote the endpoint to become permanent and respect the new peb,
- // configType and deprecated status.
- if ref.tryIncRef() {
- // TODO(b/147748385): Perform Duplicate Address Detection when promoting
- // an IPv6 endpoint to permanent.
- ref.setKind(permanent)
- ref.deprecated = deprecated
- ref.configType = configType
-
- refs := n.mu.primary[ref.protocol]
- for i, r := range refs {
- if r == ref {
- switch peb {
- case CanBePrimaryEndpoint:
- return ref, nil
- case FirstPrimaryEndpoint:
- if i == 0 {
- return ref, nil
- }
- n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
- case NeverPrimaryEndpoint:
- n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
- return ref, nil
- }
- }
- }
-
- n.insertPrimaryEndpointLocked(ref, peb)
-
- return ref, nil
- }
- // tryIncRef failing means the endpoint is scheduled to be removed once
- // the lock is released. Remove it here so we can create a new
- // (permanent) one. The removal logic waiting for the lock handles this
- // case.
- n.removeEndpointLocked(ref)
- }
- }
-
+// addAddress adds a new address to n, so that it starts accepting packets
+// targeted at the given address (and network protocol).
+func (n *NIC) addAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
ep, ok := n.networkEndpoints[protocolAddress.Protocol]
if !ok {
- return nil, tcpip.ErrUnknownProtocol
+ return tcpip.ErrUnknownProtocol
}
- isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)
-
- // If the address is an IPv6 address and it is a permanent address,
- // mark it as tentative so it goes through the DAD process if the NIC is
- // enabled. If the NIC is not enabled, DAD will be started when the NIC is
- // enabled.
- if isIPv6Unicast && kind == permanent {
- kind = permanentTentative
- }
-
- ref := &referencedNetworkEndpoint{
- refs: 1,
- addr: protocolAddress.AddressWithPrefix,
- ep: ep,
- nic: n,
- protocol: protocolAddress.Protocol,
- kind: kind,
- configType: configType,
- deprecated: deprecated,
- }
-
- // Set up resolver if link address resolution exists for this protocol.
- if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
- if linkRes, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
- ref.linkCache = n.stack
- ref.linkRes = linkRes
- }
- }
-
- // If we are adding an IPv6 unicast address, join the solicited-node
- // multicast address.
- if isIPv6Unicast {
- snmc := header.SolicitedNodeAddr(protocolAddress.AddressWithPrefix.Address)
- if err := n.joinGroupLocked(protocolAddress.Protocol, snmc); err != nil {
- return nil, err
- }
- }
-
- n.mu.endpoints[id] = ref
-
- n.insertPrimaryEndpointLocked(ref, peb)
-
- // If we are adding a tentative IPv6 address, start DAD if the NIC is enabled.
- if isIPv6Unicast && kind == permanentTentative && n.mu.enabled {
- if err := n.mu.ndp.startDuplicateAddressDetection(protocolAddress.AddressWithPrefix.Address, ref); err != nil {
- return nil, err
- }
+ addressEndpoint, err := ep.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, peb, AddressConfigStatic, false /* deprecated */)
+ if err == nil {
+ // We have no need for the address endpoint.
+ addressEndpoint.DecRef()
}
-
- return ref, nil
-}
-
-// AddAddress adds a new address to n, so that it starts accepting packets
-// targeted at the given address (and network protocol).
-func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
- // Add the endpoint.
- n.mu.Lock()
- _, err := n.addAddressLocked(protocolAddress, peb, permanent, static, false /* deprecated */)
- n.mu.Unlock()
-
return err
}
-// AllAddresses returns all addresses (primary and non-primary) associated with
+// allPermanentAddresses returns all permanent addresses associated with
// this NIC.
-func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
- n.mu.RLock()
- defer n.mu.RUnlock()
-
- addrs := make([]tcpip.ProtocolAddress, 0, len(n.mu.endpoints))
- for _, ref := range n.mu.endpoints {
- // Don't include tentative, expired or temporary endpoints to
- // avoid confusion and prevent the caller from using those.
- switch ref.getKind() {
- case permanentExpired, temporary:
- continue
+func (n *NIC) allPermanentAddresses() []tcpip.ProtocolAddress {
+ var addrs []tcpip.ProtocolAddress
+ for p, ep := range n.networkEndpoints {
+ for _, a := range ep.PermanentAddresses() {
+ addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
}
-
- addrs = append(addrs, tcpip.ProtocolAddress{
- Protocol: ref.protocol,
- AddressWithPrefix: ref.addrWithPrefix(),
- })
}
return addrs
}
-// PrimaryAddresses returns the primary addresses associated with this NIC.
-func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
- n.mu.RLock()
- defer n.mu.RUnlock()
-
+// primaryAddresses returns the primary addresses associated with this NIC.
+func (n *NIC) primaryAddresses() []tcpip.ProtocolAddress {
var addrs []tcpip.ProtocolAddress
- for proto, list := range n.mu.primary {
- for _, ref := range list {
- // Don't include tentative, expired or tempory endpoints
- // to avoid confusion and prevent the caller from using
- // those.
- switch ref.getKind() {
- case permanentTentative, permanentExpired, temporary:
- continue
- }
-
- addrs = append(addrs, tcpip.ProtocolAddress{
- Protocol: proto,
- AddressWithPrefix: ref.addrWithPrefix(),
- })
+ for p, ep := range n.networkEndpoints {
+ for _, a := range ep.PrimaryAddresses() {
+ addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
}
}
return addrs
@@ -930,147 +360,26 @@ func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
// address exists. If no non-deprecated address exists, the first deprecated
// address will be returned.
func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWithPrefix {
- n.mu.RLock()
- defer n.mu.RUnlock()
-
- list, ok := n.mu.primary[proto]
- if !ok {
+ addressEndpoint := n.primaryEndpoint(proto, "")
+ if addressEndpoint == nil {
return tcpip.AddressWithPrefix{}
}
-
- var deprecatedEndpoint *referencedNetworkEndpoint
- for _, ref := range list {
- // Don't include tentative, expired or tempory endpoints to avoid confusion
- // and prevent the caller from using those.
- switch ref.getKind() {
- case permanentTentative, permanentExpired, temporary:
- continue
- }
-
- if !ref.deprecated {
- return ref.addrWithPrefix()
- }
-
- if deprecatedEndpoint == nil {
- deprecatedEndpoint = ref
- }
- }
-
- if deprecatedEndpoint != nil {
- return deprecatedEndpoint.addrWithPrefix()
- }
-
- return tcpip.AddressWithPrefix{}
-}
-
-// insertPrimaryEndpointLocked adds r to n's primary endpoint list as required
-// by peb.
-//
-// n MUST be locked.
-func (n *NIC) insertPrimaryEndpointLocked(r *referencedNetworkEndpoint, peb PrimaryEndpointBehavior) {
- switch peb {
- case CanBePrimaryEndpoint:
- n.mu.primary[r.protocol] = append(n.mu.primary[r.protocol], r)
- case FirstPrimaryEndpoint:
- n.mu.primary[r.protocol] = append([]*referencedNetworkEndpoint{r}, n.mu.primary[r.protocol]...)
- }
+ addr := addressEndpoint.AddressWithPrefix()
+ addressEndpoint.DecRef()
+ return addr
}
-func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
- id := NetworkEndpointID{LocalAddress: r.address()}
-
- // Nothing to do if the reference has already been replaced with a different
- // one. This happens in the case where 1) this endpoint's ref count hit zero
- // and was waiting (on the lock) to be removed and 2) the same address was
- // re-added in the meantime by removing this endpoint from the list and
- // adding a new one.
- if n.mu.endpoints[id] != r {
- return
- }
-
- if r.getKind() == permanent {
- panic("Reference count dropped to zero before being removed")
- }
-
- delete(n.mu.endpoints, id)
- refs := n.mu.primary[r.protocol]
- for i, ref := range refs {
- if ref == r {
- n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
- refs[len(refs)-1] = nil
- break
- }
- }
-}
-
-func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
- n.mu.Lock()
- n.removeEndpointLocked(r)
- n.mu.Unlock()
-}
-
-func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
- r, ok := n.mu.endpoints[NetworkEndpointID{addr}]
- if !ok {
- return tcpip.ErrBadLocalAddress
- }
-
- kind := r.getKind()
- if kind != permanent && kind != permanentTentative {
- return tcpip.ErrBadLocalAddress
- }
-
- switch r.protocol {
- case header.IPv6ProtocolNumber:
- return n.removePermanentIPv6EndpointLocked(r, true /* allowSLAACInvalidation */)
- default:
- r.expireLocked()
- return nil
- }
-}
-
-func (n *NIC) removePermanentIPv6EndpointLocked(r *referencedNetworkEndpoint, allowSLAACInvalidation bool) *tcpip.Error {
- addr := r.addrWithPrefix()
-
- isIPv6Unicast := header.IsV6UnicastAddress(addr.Address)
-
- if isIPv6Unicast {
- n.mu.ndp.stopDuplicateAddressDetection(addr.Address)
-
- // If we are removing an address generated via SLAAC, cleanup
- // its SLAAC resources and notify the integrator.
- switch r.configType {
- case slaac:
- n.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
- case slaacTemp:
- n.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
- }
- }
-
- r.expireLocked()
-
- // At this point the endpoint is deleted.
-
- // If we are removing an IPv6 unicast address, leave the solicited-node
- // multicast address.
- //
- // We ignore the tcpip.ErrBadLocalAddress error because the solicited-node
- // multicast group may be left by user action.
- if isIPv6Unicast {
- snmc := header.SolicitedNodeAddr(addr.Address)
- if err := n.leaveGroupLocked(snmc, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
+// removeAddress removes an address from n.
+func (n *NIC) removeAddress(addr tcpip.Address) *tcpip.Error {
+ for _, ep := range n.networkEndpoints {
+ if err := ep.RemovePermanentAddress(addr); err == tcpip.ErrBadLocalAddress {
+ continue
+ } else {
return err
}
}
- return nil
-}
-
-// RemoveAddress removes an address from n.
-func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
- n.mu.Lock()
- defer n.mu.Unlock()
- return n.removePermanentAddressLocked(addr)
+ return tcpip.ErrBadLocalAddress
}
func (n *NIC) neighbors() ([]NeighborEntry, *tcpip.Error) {
@@ -1121,91 +430,66 @@ func (n *NIC) clearNeighbors() *tcpip.Error {
// joinGroup adds a new endpoint for the given multicast address, if none
// exists yet. Otherwise it just increments its count.
func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- return n.joinGroupLocked(protocol, addr)
-}
-
-// joinGroupLocked adds a new endpoint for the given multicast address, if none
-// exists yet. Otherwise it just increments its count. n MUST be locked before
-// joinGroupLocked is called.
-func (n *NIC) joinGroupLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
// TODO(b/143102137): When implementing MLD, make sure MLD packets are
// not sent unless a valid link-local address is available for use on n
// as an MLD packet's source address must be a link-local address as
// outlined in RFC 3810 section 5.
- id := NetworkEndpointID{addr}
- joins := n.mu.mcastJoins[id]
- if joins == 0 {
- netProto, ok := n.stack.networkProtocols[protocol]
- if !ok {
- return tcpip.ErrUnknownProtocol
- }
- if _, err := n.addAddressLocked(tcpip.ProtocolAddress{
- Protocol: protocol,
- AddressWithPrefix: tcpip.AddressWithPrefix{
- Address: addr,
- PrefixLen: netProto.DefaultPrefixLen(),
- },
- }, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
- return err
- }
+ ep, ok := n.networkEndpoints[protocol]
+ if !ok {
+ return tcpip.ErrNotSupported
}
- n.mu.mcastJoins[id] = joins + 1
- return nil
+
+ gep, ok := ep.(GroupAddressableEndpoint)
+ if !ok {
+ return tcpip.ErrNotSupported
+ }
+
+ _, err := gep.JoinGroup(addr)
+ return err
}
// leaveGroup decrements the count for the given multicast address, and when it
// reaches zero removes the endpoint for this address.
-func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- return n.leaveGroupLocked(addr, false /* force */)
-}
+func (n *NIC) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
+ ep, ok := n.networkEndpoints[protocol]
+ if !ok {
+ return tcpip.ErrNotSupported
+ }
-// leaveGroupLocked decrements the count for the given multicast address, and
-// when it reaches zero removes the endpoint for this address. n MUST be locked
-// before leaveGroupLocked is called.
-//
-// If force is true, then the count for the multicast addres is ignored and the
-// endpoint will be removed immediately.
-func (n *NIC) leaveGroupLocked(addr tcpip.Address, force bool) *tcpip.Error {
- id := NetworkEndpointID{addr}
- joins, ok := n.mu.mcastJoins[id]
+ gep, ok := ep.(GroupAddressableEndpoint)
if !ok {
- // There are no joins with this address on this NIC.
- return tcpip.ErrBadLocalAddress
+ return tcpip.ErrNotSupported
}
- joins--
- if force || joins == 0 {
- // There are no outstanding joins or we are forced to leave, clean up.
- delete(n.mu.mcastJoins, id)
- return n.removePermanentAddressLocked(addr)
+ if _, err := gep.LeaveGroup(addr); err != nil {
+ return err
}
- n.mu.mcastJoins[id] = joins
return nil
}
// isInGroup returns true if n has joined the multicast group addr.
func (n *NIC) isInGroup(addr tcpip.Address) bool {
- n.mu.RLock()
- joins := n.mu.mcastJoins[NetworkEndpointID{addr}]
- n.mu.RUnlock()
+ for _, ep := range n.networkEndpoints {
+ gep, ok := ep.(GroupAddressableEndpoint)
+ if !ok {
+ continue
+ }
- return joins != 0
+ if gep.IsInGroup(addr) {
+ return true
+ }
+ }
+
+ return false
}
-func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt *PacketBuffer) {
- r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
+func (n *NIC) handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, remotelinkAddr tcpip.LinkAddress, addressEndpoint AssignableAddressEndpoint, pkt *PacketBuffer) {
+ r := makeRoute(protocol, dst, src, n, addressEndpoint, false /* handleLocal */, false /* multicastLoop */)
r.RemoteLinkAddress = remotelinkAddr
-
- ref.ep.HandlePacket(&r, pkt)
- ref.decRef()
+ addressEndpoint.NetworkEndpoint().HandlePacket(&r, pkt)
+ addressEndpoint.DecRef()
}
// DeliverNetworkPacket finds the appropriate network protocol endpoint and
@@ -1216,7 +500,7 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address,
// the ownership of the items is not retained by the caller.
func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
n.mu.RLock()
- enabled := n.mu.enabled
+ enabled := n.Enabled()
// If the NIC is not yet enabled, don't receive any packets.
if !enabled {
n.mu.RUnlock()
@@ -1274,17 +558,21 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
src, dst := netProto.ParseAddresses(pkt.NetworkHeader().View())
- if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil {
- // The source address is one of our own, so we never should have gotten a
- // packet like this unless handleLocal is false. Loopback also calls this
- // function even though the packets didn't come from the physical interface
- // so don't drop those.
- n.stack.stats.IP.InvalidSourceAddressesReceived.Increment()
- return
+ if n.stack.handleLocal && !n.IsLoopback() {
+ if r := n.getAddress(protocol, src); r != nil {
+ r.DecRef()
+
+ // The source address is one of our own, so we never should have gotten a
+ // packet like this unless handleLocal is false. Loopback also calls this
+ // function even though the packets didn't come from the physical interface
+ // so don't drop those.
+ n.stack.stats.IP.InvalidSourceAddressesReceived.Increment()
+ return
+ }
}
// Loopback traffic skips the prerouting chain.
- if !n.isLoopback() {
+ if !n.IsLoopback() {
// iptables filtering.
ipt := n.stack.IPTables()
address := n.primaryAddress(protocol)
@@ -1295,8 +583,8 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
}
}
- if ref := n.getRef(protocol, dst); ref != nil {
- handlePacket(protocol, dst, src, n.linkEP.LinkAddress(), remote, ref, pkt)
+ if addressEndpoint := n.getAddress(protocol, dst); addressEndpoint != nil {
+ n.handlePacket(protocol, dst, src, remote, addressEndpoint, pkt)
return
}
@@ -1312,20 +600,20 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
}
// Found a NIC.
- n := r.ref.nic
- n.mu.RLock()
- ref, ok := n.mu.endpoints[NetworkEndpointID{dst}]
- ok = ok && ref.isValidForOutgoingRLocked() && ref.tryIncRef()
- n.mu.RUnlock()
- if ok {
- r.LocalLinkAddress = n.linkEP.LinkAddress()
- r.RemoteLinkAddress = remote
- r.RemoteAddress = src
- // TODO(b/123449044): Update the source NIC as well.
- ref.ep.HandlePacket(&r, pkt)
- ref.decRef()
- r.Release()
- return
+ n := r.nic
+ if addressEndpoint := n.getAddressOrCreateTempInner(protocol, dst, false, NeverPrimaryEndpoint); addressEndpoint != nil {
+ if n.isValidForOutgoing(addressEndpoint) {
+ r.LocalLinkAddress = n.linkEP.LinkAddress()
+ r.RemoteLinkAddress = remote
+ r.RemoteAddress = src
+ // TODO(b/123449044): Update the source NIC as well.
+ addressEndpoint.NetworkEndpoint().HandlePacket(&r, pkt)
+ addressEndpoint.DecRef()
+ r.Release()
+ return
+ }
+
+ addressEndpoint.DecRef()
}
// n doesn't have a destination endpoint.
@@ -1398,11 +686,13 @@ func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt
// DeliverTransportPacket delivers the packets to the appropriate transport
// protocol endpoint.
-func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
+func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
state, ok := n.stack.transportProtocols[protocol]
if !ok {
+ // TODO(gvisor.dev/issue/4365): Let the caller know that the transport
+ // protocol is unrecognized.
n.stack.stats.UnknownProtocolRcvdPackets.Increment()
- return
+ return TransportPacketHandled
}
transProto := state.proto
@@ -1423,59 +713,47 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
// we parse it using the minimum size.
if _, ok := pkt.TransportHeader().Consume(transProto.MinimumPacketSize()); !ok {
n.stack.stats.MalformedRcvdPackets.Increment()
- return
+ // We consider a malformed transport packet handled because there is
+ // nothing the caller can do.
+ return TransportPacketHandled
}
- } else {
- // This is either a bad packet or was re-assembled from fragments.
- transProto.Parse(pkt)
+ } else if !transProto.Parse(pkt) {
+ n.stack.stats.MalformedRcvdPackets.Increment()
+ return TransportPacketHandled
}
}
- if pkt.TransportHeader().View().Size() < transProto.MinimumPacketSize() {
- n.stack.stats.MalformedRcvdPackets.Increment()
- return
- }
-
srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().View())
if err != nil {
n.stack.stats.MalformedRcvdPackets.Increment()
- return
+ return TransportPacketHandled
}
id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
if n.stack.demux.deliverPacket(r, protocol, pkt, id) {
- return
+ return TransportPacketHandled
}
// Try to deliver to per-stack default handler.
if state.defaultHandler != nil {
if state.defaultHandler(r, id, pkt) {
- return
+ return TransportPacketHandled
}
}
// We could not find an appropriate destination for this packet so
// give the protocol specific error handler a chance to handle it.
// If it doesn't handle it then we should do so.
- switch transProto.HandleUnknownDestinationPacket(r, id, pkt) {
+ switch res := transProto.HandleUnknownDestinationPacket(r, id, pkt); res {
case UnknownDestinationPacketMalformed:
n.stack.stats.MalformedRcvdPackets.Increment()
+ return TransportPacketHandled
case UnknownDestinationPacketUnhandled:
- // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination
- // Unreachable messages with code:
- // 3 (Port Unreachable), when the designated transport protocol
- // (e.g., UDP) is unable to demultiplex the datagram but has no
- // protocol mechanism to inform the sender.
- np, ok := n.stack.networkProtocols[r.NetProto]
- if !ok {
- // For this to happen stack.makeRoute() must have been called with the
- // incorrect protocol number. Since we have successfully completed
- // network layer processing this should be impossible.
- panic(fmt.Sprintf("expected stack to have a NetworkProtocol for proto = %d", r.NetProto))
- }
-
- _ = np.ReturnError(r, &tcpip.ICMPReasonPortUnreachable{}, pkt)
+ return TransportPacketDestinationPortUnreachable
case UnknownDestinationPacketHandled:
+ return TransportPacketHandled
+ default:
+ panic(fmt.Sprintf("unrecognized result from HandleUnknownDestinationPacket = %d", res))
}
}
@@ -1508,96 +786,23 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp
}
}
-// ID returns the identifier of n.
+// ID implements NetworkInterface.
func (n *NIC) ID() tcpip.NICID {
return n.id
}
-// Name returns the name of n.
+// Name implements NetworkInterface.
func (n *NIC) Name() string {
return n.name
}
-// Stack returns the instance of the Stack that owns this NIC.
-func (n *NIC) Stack() *Stack {
- return n.stack
-}
-
-// LinkEndpoint returns the link endpoint of n.
+// LinkEndpoint implements NetworkInterface.
func (n *NIC) LinkEndpoint() LinkEndpoint {
return n.linkEP
}
-// isAddrTentative returns true if addr is tentative on n.
-//
-// Note that if addr is not associated with n, then this function will return
-// false. It will only return true if the address is associated with the NIC
-// AND it is tentative.
-func (n *NIC) isAddrTentative(addr tcpip.Address) bool {
- n.mu.RLock()
- defer n.mu.RUnlock()
-
- ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]
- if !ok {
- return false
- }
-
- return ref.getKind() == permanentTentative
-}
-
-// dupTentativeAddrDetected attempts to inform n that a tentative addr is a
-// duplicate on a link.
-//
-// dupTentativeAddrDetected will remove the tentative address if it exists. If
-// the address was generated via SLAAC, an attempt will be made to generate a
-// new address.
-func (n *NIC) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]
- if !ok {
- return tcpip.ErrBadAddress
- }
-
- if ref.getKind() != permanentTentative {
- return tcpip.ErrInvalidEndpointState
- }
-
- // If the address is a SLAAC address, do not invalidate its SLAAC prefix as a
- // new address will be generated for it.
- if err := n.removePermanentIPv6EndpointLocked(ref, false /* allowSLAACInvalidation */); err != nil {
- return err
- }
-
- prefix := ref.addrWithPrefix().Subnet()
-
- switch ref.configType {
- case slaac:
- n.mu.ndp.regenerateSLAACAddr(prefix)
- case slaacTemp:
- // Do not reset the generation attempts counter for the prefix as the
- // temporary address is being regenerated in response to a DAD conflict.
- n.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
- }
-
- return nil
-}
-
-// setNDPConfigs sets the NDP configurations for n.
-//
-// Note, if c contains invalid NDP configuration values, it will be fixed to
-// use default values for the erroneous values.
-func (n *NIC) setNDPConfigs(c NDPConfigurations) {
- c.validate()
-
- n.mu.Lock()
- n.mu.ndp.configs = c
- n.mu.Unlock()
-}
-
-// NUDConfigs gets the NUD configurations for n.
-func (n *NIC) NUDConfigs() (NUDConfigurations, *tcpip.Error) {
+// nudConfigs gets the NUD configurations for n.
+func (n *NIC) nudConfigs() (NUDConfigurations, *tcpip.Error) {
if n.neigh == nil {
return NUDConfigurations{}, tcpip.ErrNotSupported
}
@@ -1617,49 +822,6 @@ func (n *NIC) setNUDConfigs(c NUDConfigurations) *tcpip.Error {
return nil
}
-// handleNDPRA handles an NDP Router Advertisement message that arrived on n.
-func (n *NIC) handleNDPRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- n.mu.ndp.handleRA(ip, ra)
-}
-
-type networkEndpointKind int32
-
-const (
- // A permanentTentative endpoint is a permanent address that is not yet
- // considered to be fully bound to an interface in the traditional
- // sense. That is, the address is associated with a NIC, but packets
- // destined to the address MUST NOT be accepted and MUST be silently
- // dropped, and the address MUST NOT be used as a source address for
- // outgoing packets. For IPv6, addresses will be of this kind until
- // NDP's Duplicate Address Detection has resolved, or be deleted if
- // the process results in detecting a duplicate address.
- permanentTentative networkEndpointKind = iota
-
- // A permanent endpoint is created by adding a permanent address (vs. a
- // temporary one) to the NIC. Its reference count is biased by 1 to avoid
- // removal when no route holds a reference to it. It is removed by explicitly
- // removing the permanent address from the NIC.
- permanent
-
- // An expired permanent endpoint is a permanent endpoint that had its address
- // removed from the NIC, and it is waiting to be removed once no more routes
- // hold a reference to it. This is achieved by decreasing its reference count
- // by 1. If its address is re-added before the endpoint is removed, its type
- // changes back to permanent and its reference count increases by 1 again.
- permanentExpired
-
- // A temporary endpoint is created for spoofing outgoing packets, or when in
- // promiscuous mode and accepting incoming packets that don't match any
- // permanent endpoint. Its reference count is not biased by 1 and the
- // endpoint is removed immediately when no more route holds a reference to
- // it. A temporary endpoint can be promoted to permanent if its address
- // is added permanently.
- temporary
-)
-
func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
n.mu.Lock()
defer n.mu.Unlock()
@@ -1690,153 +852,12 @@ func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep
}
}
-type networkEndpointConfigType int32
-
-const (
- // A statically configured endpoint is an address that was added by
- // some user-specified action (adding an explicit address, joining a
- // multicast group).
- static networkEndpointConfigType = iota
-
- // A SLAAC configured endpoint is an IPv6 endpoint that was added by
- // SLAAC as per RFC 4862 section 5.5.3.
- slaac
-
- // A temporary SLAAC configured endpoint is an IPv6 endpoint that was added by
- // SLAAC as per RFC 4941. Temporary SLAAC addresses are short-lived and are
- // not expected to be valid (or preferred) forever; hence the term temporary.
- slaacTemp
-)
-
-type referencedNetworkEndpoint struct {
- ep NetworkEndpoint
- addr tcpip.AddressWithPrefix
- nic *NIC
- protocol tcpip.NetworkProtocolNumber
-
- // linkCache is set if link address resolution is enabled for this
- // protocol. Set to nil otherwise.
- linkCache LinkAddressCache
-
- // linkRes is set if link address resolution is enabled for this protocol.
- // Set to nil otherwise.
- linkRes LinkAddressResolver
-
- // refs is counting references held for this endpoint. When refs hits zero it
- // triggers the automatic removal of the endpoint from the NIC.
- refs int32
-
- // networkEndpointKind must only be accessed using {get,set}Kind().
- kind networkEndpointKind
-
- // configType is the method that was used to configure this endpoint.
- // This must never change except during endpoint creation and promotion to
- // permanent.
- configType networkEndpointConfigType
-
- // deprecated indicates whether or not the endpoint should be considered
- // deprecated. That is, when deprecated is true, other endpoints that are not
- // deprecated should be preferred.
- deprecated bool
-}
-
-func (r *referencedNetworkEndpoint) address() tcpip.Address {
- return r.addr.Address
-}
-
-func (r *referencedNetworkEndpoint) addrWithPrefix() tcpip.AddressWithPrefix {
- return r.addr
-}
-
-func (r *referencedNetworkEndpoint) getKind() networkEndpointKind {
- return networkEndpointKind(atomic.LoadInt32((*int32)(&r.kind)))
-}
-
-func (r *referencedNetworkEndpoint) setKind(kind networkEndpointKind) {
- atomic.StoreInt32((*int32)(&r.kind), int32(kind))
-}
-
// isValidForOutgoing returns true if the endpoint can be used to send out a
// packet. It requires the endpoint to not be marked expired (i.e., its address)
// has been removed) unless the NIC is in spoofing mode, or temporary.
-func (r *referencedNetworkEndpoint) isValidForOutgoing() bool {
- r.nic.mu.RLock()
- defer r.nic.mu.RUnlock()
-
- return r.isValidForOutgoingRLocked()
-}
-
-// isValidForOutgoingRLocked is the same as isValidForOutgoing but requires
-// r.nic.mu to be read locked.
-func (r *referencedNetworkEndpoint) isValidForOutgoingRLocked() bool {
- if !r.nic.mu.enabled {
- return false
- }
-
- return r.isAssignedRLocked(r.nic.mu.spoofing)
-}
-
-// isAssignedRLocked returns true if r is considered to be assigned to the NIC.
-//
-// r.nic.mu must be read locked.
-func (r *referencedNetworkEndpoint) isAssignedRLocked(spoofingOrPromiscuous bool) bool {
- switch r.getKind() {
- case permanentTentative:
- return false
- case permanentExpired:
- return spoofingOrPromiscuous
- default:
- return true
- }
-}
-
-// expireLocked decrements the reference count and marks the permanent endpoint
-// as expired.
-func (r *referencedNetworkEndpoint) expireLocked() {
- r.setKind(permanentExpired)
- r.decRefLocked()
-}
-
-// decRef decrements the ref count and cleans up the endpoint once it reaches
-// zero.
-func (r *referencedNetworkEndpoint) decRef() {
- if atomic.AddInt32(&r.refs, -1) == 0 {
- r.nic.removeEndpoint(r)
- }
-}
-
-// decRefLocked is the same as decRef but assumes that the NIC.mu mutex is
-// locked.
-func (r *referencedNetworkEndpoint) decRefLocked() {
- if atomic.AddInt32(&r.refs, -1) == 0 {
- r.nic.removeEndpointLocked(r)
- }
-}
-
-// incRef increments the ref count. It must only be called when the caller is
-// known to be holding a reference to the endpoint, otherwise tryIncRef should
-// be used.
-func (r *referencedNetworkEndpoint) incRef() {
- atomic.AddInt32(&r.refs, 1)
-}
-
-// tryIncRef attempts to increment the ref count from n to n+1, but only if n is
-// not zero. That is, it will increment the count if the endpoint is still
-// alive, and do nothing if it has already been clean up.
-func (r *referencedNetworkEndpoint) tryIncRef() bool {
- for {
- v := atomic.LoadInt32(&r.refs)
- if v == 0 {
- return false
- }
-
- if atomic.CompareAndSwapInt32(&r.refs, v, v+1) {
- return true
- }
- }
-}
-
-// stack returns the Stack instance that owns the underlying endpoint.
-func (r *referencedNetworkEndpoint) stack() *Stack {
- return r.nic.stack
+func (n *NIC) isValidForOutgoing(ep AssignableAddressEndpoint) bool {
+ n.mu.RLock()
+ spoofing := n.mu.spoofing
+ n.mu.RUnlock()
+ return n.Enabled() && ep.IsAssigned(spoofing)
}
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index ef6e63b3e..fdd49b77f 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -15,96 +15,40 @@
package stack
import (
- "math"
"testing"
- "time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
-var _ LinkEndpoint = (*testLinkEndpoint)(nil)
+var _ AddressableEndpoint = (*testIPv6Endpoint)(nil)
+var _ NetworkEndpoint = (*testIPv6Endpoint)(nil)
+var _ NDPEndpoint = (*testIPv6Endpoint)(nil)
-// A LinkEndpoint that throws away outgoing packets.
+// An IPv6 NetworkEndpoint that throws away outgoing packets.
//
-// We use this instead of the channel endpoint as the channel package depends on
+// We use this instead of ipv6.endpoint because the ipv6 package depends on
// the stack package which this test lives in, causing a cyclic dependency.
-type testLinkEndpoint struct {
- dispatcher NetworkDispatcher
-}
-
-// Attach implements LinkEndpoint.Attach.
-func (e *testLinkEndpoint) Attach(dispatcher NetworkDispatcher) {
- e.dispatcher = dispatcher
-}
-
-// IsAttached implements LinkEndpoint.IsAttached.
-func (e *testLinkEndpoint) IsAttached() bool {
- return e.dispatcher != nil
-}
-
-// MTU implements LinkEndpoint.MTU.
-func (*testLinkEndpoint) MTU() uint32 {
- return math.MaxUint16
-}
-
-// Capabilities implements LinkEndpoint.Capabilities.
-func (*testLinkEndpoint) Capabilities() LinkEndpointCapabilities {
- return CapabilityResolutionRequired
-}
+type testIPv6Endpoint struct {
+ AddressableEndpointState
-// MaxHeaderLength implements LinkEndpoint.MaxHeaderLength.
-func (*testLinkEndpoint) MaxHeaderLength() uint16 {
- return 0
-}
+ nicID tcpip.NICID
+ linkEP LinkEndpoint
+ protocol *testIPv6Protocol
-// LinkAddress returns the link address of this endpoint.
-func (*testLinkEndpoint) LinkAddress() tcpip.LinkAddress {
- return ""
+ invalidatedRtr tcpip.Address
}
-// Wait implements LinkEndpoint.Wait.
-func (*testLinkEndpoint) Wait() {}
-
-// WritePacket implements LinkEndpoint.WritePacket.
-func (e *testLinkEndpoint) WritePacket(*Route, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) *tcpip.Error {
+func (*testIPv6Endpoint) Enable() *tcpip.Error {
return nil
}
-// WritePackets implements LinkEndpoint.WritePackets.
-func (e *testLinkEndpoint) WritePackets(*Route, *GSO, PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
- // Our tests don't use this so we don't support it.
- return 0, tcpip.ErrNotSupported
-}
-
-// WriteRawPacket implements LinkEndpoint.WriteRawPacket.
-func (e *testLinkEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
- // Our tests don't use this so we don't support it.
- return tcpip.ErrNotSupported
+func (*testIPv6Endpoint) Enabled() bool {
+ return true
}
-// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
-func (*testLinkEndpoint) ARPHardwareType() header.ARPHardwareType {
- panic("not implemented")
-}
-
-// AddHeader implements stack.LinkEndpoint.AddHeader.
-func (e *testLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
- panic("not implemented")
-}
-
-var _ NetworkEndpoint = (*testIPv6Endpoint)(nil)
-
-// An IPv6 NetworkEndpoint that throws away outgoing packets.
-//
-// We use this instead of ipv6.endpoint because the ipv6 package depends on
-// the stack package which this test lives in, causing a cyclic dependency.
-type testIPv6Endpoint struct {
- nicID tcpip.NICID
- linkEP LinkEndpoint
- protocol *testIPv6Protocol
-}
+func (*testIPv6Endpoint) Disable() {}
// DefaultTTL implements NetworkEndpoint.DefaultTTL.
func (*testIPv6Endpoint) DefaultTTL() uint8 {
@@ -116,11 +60,6 @@ func (e *testIPv6Endpoint) MTU() uint32 {
return e.linkEP.MTU() - header.IPv6MinimumSize
}
-// Capabilities implements NetworkEndpoint.Capabilities.
-func (e *testIPv6Endpoint) Capabilities() LinkEndpointCapabilities {
- return e.linkEP.Capabilities()
-}
-
// MaxHeaderLength implements NetworkEndpoint.MaxHeaderLength.
func (e *testIPv6Endpoint) MaxHeaderLength() uint16 {
return e.linkEP.MaxHeaderLength() + header.IPv6MinimumSize
@@ -144,23 +83,24 @@ func (*testIPv6Endpoint) WriteHeaderIncludedPacket(*Route, *PacketBuffer) *tcpip
return tcpip.ErrNotSupported
}
-// NICID implements NetworkEndpoint.NICID.
-func (e *testIPv6Endpoint) NICID() tcpip.NICID {
- return e.nicID
-}
-
// HandlePacket implements NetworkEndpoint.HandlePacket.
func (*testIPv6Endpoint) HandlePacket(*Route, *PacketBuffer) {
}
// Close implements NetworkEndpoint.Close.
-func (*testIPv6Endpoint) Close() {}
+func (e *testIPv6Endpoint) Close() {
+ e.AddressableEndpointState.Cleanup()
+}
// NetworkProtocolNumber implements NetworkEndpoint.NetworkProtocolNumber.
func (*testIPv6Endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
return header.IPv6ProtocolNumber
}
+func (e *testIPv6Endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
+ e.invalidatedRtr = rtr
+}
+
var _ NetworkProtocol = (*testIPv6Protocol)(nil)
// An IPv6 NetworkProtocol that supports the bare minimum to make a stack
@@ -192,12 +132,14 @@ func (*testIPv6Protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address)
}
// NewEndpoint implements NetworkProtocol.NewEndpoint.
-func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint {
- return &testIPv6Endpoint{
- nicID: nicID,
- linkEP: linkEP,
+func (p *testIPv6Protocol) NewEndpoint(nic NetworkInterface, _ LinkAddressCache, _ NUDHandler, _ TransportDispatcher) NetworkEndpoint {
+ e := &testIPv6Endpoint{
+ nicID: nic.ID(),
+ linkEP: nic.LinkEndpoint(),
protocol: p,
}
+ e.AddressableEndpointState.Init(e)
+ return e
}
// SetOption implements NetworkProtocol.SetOption.
@@ -221,11 +163,6 @@ func (*testIPv6Protocol) Parse(*PacketBuffer) (tcpip.TransportProtocolNumber, bo
return 0, false, false
}
-// ReturnError implements NetworkProtocol.ReturnError.
-func (*testIPv6Protocol) ReturnError(*Route, tcpip.ICMPReason, *PacketBuffer) *tcpip.Error {
- return nil
-}
-
var _ LinkAddressResolver = (*testIPv6Protocol)(nil)
// LinkAddressProtocol implements LinkAddressResolver.
@@ -246,38 +183,6 @@ func (*testIPv6Protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAdd
return "", false
}
-// Test the race condition where a NIC is removed and an RS timer fires at the
-// same time.
-func TestRemoveNICWhileHandlingRSTimer(t *testing.T) {
- const (
- nicID = 1
-
- maxRtrSolicitations = 5
- )
-
- e := testLinkEndpoint{}
- s := New(Options{
- NetworkProtocols: []NetworkProtocol{&testIPv6Protocol{}},
- NDPConfigs: NDPConfigurations{
- MaxRtrSolicitations: maxRtrSolicitations,
- RtrSolicitationInterval: minimumRtrSolicitationInterval,
- },
- })
-
- if err := s.CreateNIC(nicID, &e); err != nil {
- t.Fatalf("s.CreateNIC(%d, _) = %s", nicID, err)
- }
-
- s.mu.Lock()
- // Wait for the router solicitation timer to fire and block trying to obtain
- // the stack lock when doing link address resolution.
- time.Sleep(minimumRtrSolicitationInterval * 2)
- if err := s.removeNICLocked(nicID); err != nil {
- t.Fatalf("s.removeNICLocked(%d) = %s", nicID, err)
- }
- s.mu.Unlock()
-}
-
func TestDisabledRxStatsWhenNICDisabled(t *testing.T) {
// When the NIC is disabled, the only field that matters is the stats field.
// This test is limited to stats counter checks.
diff --git a/pkg/tcpip/stack/nud_test.go b/pkg/tcpip/stack/nud_test.go
index 2b97e5972..8cffb9fc6 100644
--- a/pkg/tcpip/stack/nud_test.go
+++ b/pkg/tcpip/stack/nud_test.go
@@ -60,7 +60,7 @@ func TestSetNUDConfigurationFailsForBadNICID(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The networking
// stack will only allocate neighbor caches if a protocol providing link
// address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
UseNeighborCache: true,
})
@@ -137,7 +137,7 @@ func TestDefaultNUDConfigurations(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The networking
// stack will only allocate neighbor caches if a protocol providing link
// address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: stack.DefaultNUDConfigurations(),
UseNeighborCache: true,
})
@@ -192,7 +192,7 @@ func TestNUDConfigurationsBaseReachableTime(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -249,7 +249,7 @@ func TestNUDConfigurationsMinRandomFactor(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -329,7 +329,7 @@ func TestNUDConfigurationsMaxRandomFactor(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -391,7 +391,7 @@ func TestNUDConfigurationsRetransmitTimer(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -443,7 +443,7 @@ func TestNUDConfigurationsDelayFirstProbeTime(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -495,7 +495,7 @@ func TestNUDConfigurationsMaxMulticastProbes(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -547,7 +547,7 @@ func TestNUDConfigurationsMaxUnicastProbes(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
@@ -599,7 +599,7 @@ func TestNUDConfigurationsUnreachableTime(t *testing.T) {
// A neighbor cache is required to store NUDConfigurations. The
// networking stack will only allocate neighbor caches if a protocol
// providing link address resolution is specified (e.g. ARP or IPv6).
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
NUDConfigs: c,
UseNeighborCache: true,
})
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 77640cd8a..567e1904e 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -15,6 +15,8 @@
package stack
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -152,10 +154,10 @@ type TransportProtocol interface {
Number() tcpip.TransportProtocolNumber
// NewEndpoint creates a new endpoint of the transport protocol.
- NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
+ NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// NewRawEndpoint creates a new raw endpoint of the transport protocol.
- NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
+ NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// MinimumPacketSize returns the minimum valid packet size of this
// transport protocol. The stack automatically drops any packets smaller
@@ -197,6 +199,21 @@ type TransportProtocol interface {
Parse(pkt *PacketBuffer) (ok bool)
}
+// TransportPacketDisposition is the result from attempting to deliver a packet
+// to the transport layer.
+type TransportPacketDisposition int
+
+const (
+ // TransportPacketHandled indicates that a transport packet was handled by the
+ // transport layer and callers need not take any further action.
+ TransportPacketHandled TransportPacketDisposition = iota
+
+ // TransportPacketDestinationPortUnreachable indicates that there weren't any
+ // listeners interested in the packet and the transport protocol has no means
+ // to notify the sender.
+ TransportPacketDestinationPortUnreachable
+)
+
// TransportDispatcher contains the methods used by the network stack to deliver
// packets to the appropriate transport endpoint after it has been handled by
// the network layer.
@@ -207,7 +224,7 @@ type TransportDispatcher interface {
// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
//
// DeliverTransportPacket takes ownership of pkt.
- DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer)
+ DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition
// DeliverTransportControlPacket delivers control packets to the
// appropriate transport protocol endpoint.
@@ -244,9 +261,254 @@ type NetworkHeaderParams struct {
TOS uint8
}
+// GroupAddressableEndpoint is an endpoint that supports group addressing.
+//
+// An endpoint is considered to support group addressing when one or more
+// endpoints may associate themselves with the same identifier (group address).
+type GroupAddressableEndpoint interface {
+ // JoinGroup joins the spcified group.
+ //
+ // Returns true if the group was newly joined.
+ JoinGroup(group tcpip.Address) (bool, *tcpip.Error)
+
+ // LeaveGroup attempts to leave the specified group.
+ //
+ // Returns tcpip.ErrBadLocalAddress if the endpoint has not joined the group.
+ LeaveGroup(group tcpip.Address) (bool, *tcpip.Error)
+
+ // IsInGroup returns true if the endpoint is a member of the specified group.
+ IsInGroup(group tcpip.Address) bool
+}
+
+// PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary
+// behavior.
+type PrimaryEndpointBehavior int
+
+const (
+ // CanBePrimaryEndpoint indicates the endpoint can be used as a primary
+ // endpoint for new connections with no local address. This is the
+ // default when calling NIC.AddAddress.
+ CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
+
+ // FirstPrimaryEndpoint indicates the endpoint should be the first
+ // primary endpoint considered. If there are multiple endpoints with
+ // this behavior, they are ordered by recency.
+ FirstPrimaryEndpoint
+
+ // NeverPrimaryEndpoint indicates the endpoint should never be a
+ // primary endpoint.
+ NeverPrimaryEndpoint
+)
+
+// AddressConfigType is the method used to add an address.
+type AddressConfigType int
+
+const (
+ // AddressConfigStatic is a statically configured address endpoint that was
+ // added by some user-specified action (adding an explicit address, joining a
+ // multicast group).
+ AddressConfigStatic AddressConfigType = iota
+
+ // AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862
+ // section 5.5.3.
+ AddressConfigSlaac
+
+ // AddressConfigSlaacTemp is a temporary address endpoint added by SLAAC as
+ // per RFC 4941. Temporary SLAAC addresses are short-lived and are not
+ // to be valid (or preferred) forever; hence the term temporary.
+ AddressConfigSlaacTemp
+)
+
+// AssignableAddressEndpoint is a reference counted address endpoint that may be
+// assigned to a NetworkEndpoint.
+type AssignableAddressEndpoint interface {
+ // NetworkEndpoint returns the NetworkEndpoint the receiver is associated
+ // with.
+ NetworkEndpoint() NetworkEndpoint
+
+ // AddressWithPrefix returns the endpoint's address.
+ AddressWithPrefix() tcpip.AddressWithPrefix
+
+ // IsAssigned returns whether or not the endpoint is considered bound
+ // to its NetworkEndpoint.
+ IsAssigned(allowExpired bool) bool
+
+ // IncRef increments this endpoint's reference count.
+ //
+ // Returns true if it was successfully incremented. If it returns false, then
+ // the endpoint is considered expired and should no longer be used.
+ IncRef() bool
+
+ // DecRef decrements this endpoint's reference count.
+ DecRef()
+}
+
+// AddressEndpoint is an endpoint representing an address assigned to an
+// AddressableEndpoint.
+type AddressEndpoint interface {
+ AssignableAddressEndpoint
+
+ // GetKind returns the address kind for this endpoint.
+ GetKind() AddressKind
+
+ // SetKind sets the address kind for this endpoint.
+ SetKind(AddressKind)
+
+ // ConfigType returns the method used to add the address.
+ ConfigType() AddressConfigType
+
+ // Deprecated returns whether or not this endpoint is deprecated.
+ Deprecated() bool
+
+ // SetDeprecated sets this endpoint's deprecated status.
+ SetDeprecated(bool)
+}
+
+// AddressKind is the kind of of an address.
+//
+// See the values of AddressKind for more details.
+type AddressKind int
+
+const (
+ // PermanentTentative is a permanent address endpoint that is not yet
+ // considered to be fully bound to an interface in the traditional
+ // sense. That is, the address is associated with a NIC, but packets
+ // destined to the address MUST NOT be accepted and MUST be silently
+ // dropped, and the address MUST NOT be used as a source address for
+ // outgoing packets. For IPv6, addresses are of this kind until NDP's
+ // Duplicate Address Detection (DAD) resolves. If DAD fails, the address
+ // is removed.
+ PermanentTentative AddressKind = iota
+
+ // Permanent is a permanent endpoint (vs. a temporary one) assigned to the
+ // NIC. Its reference count is biased by 1 to avoid removal when no route
+ // holds a reference to it. It is removed by explicitly removing the address
+ // from the NIC.
+ Permanent
+
+ // PermanentExpired is a permanent endpoint that had its address removed from
+ // the NIC, and it is waiting to be removed once no references to it are held.
+ //
+ // If the address is re-added before the endpoint is removed, its type
+ // changes back to Permanent.
+ PermanentExpired
+
+ // Temporary is an endpoint, created on a one-off basis to temporarily
+ // consider the NIC bound an an address that it is not explictiy bound to
+ // (such as a permanent address). Its reference count must not be biased by 1
+ // so that the address is removed immediately when references to it are no
+ // longer held.
+ //
+ // A temporary endpoint may be promoted to permanent if the address is added
+ // permanently.
+ Temporary
+)
+
+// IsPermanent returns true if the AddressKind represents a permanent address.
+func (k AddressKind) IsPermanent() bool {
+ switch k {
+ case Permanent, PermanentTentative:
+ return true
+ case Temporary, PermanentExpired:
+ return false
+ default:
+ panic(fmt.Sprintf("unrecognized address kind = %d", k))
+ }
+}
+
+// AddressableEndpoint is an endpoint that supports addressing.
+//
+// An endpoint is considered to support addressing when the endpoint may
+// associate itself with an identifier (address).
+type AddressableEndpoint interface {
+ // AddAndAcquirePermanentAddress adds the passed permanent address.
+ //
+ // Returns tcpip.ErrDuplicateAddress if the address exists.
+ //
+ // Acquires and returns the AddressEndpoint for the added address.
+ AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, *tcpip.Error)
+
+ // RemovePermanentAddress removes the passed address if it is a permanent
+ // address.
+ //
+ // Returns tcpip.ErrBadLocalAddress if the endpoint does not have the passed
+ // permanent address.
+ RemovePermanentAddress(addr tcpip.Address) *tcpip.Error
+
+ // AcquireAssignedAddress returns an AddressEndpoint for the passed address
+ // that is considered bound to the endpoint, optionally creating a temporary
+ // endpoint if requested and no existing address exists.
+ //
+ // The returned endpoint's reference count is incremented.
+ //
+ // Returns nil if the specified address is not local to this endpoint.
+ AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint
+
+ // AcquirePrimaryAddress returns a primary endpoint to use when communicating
+ // with the passed remote address.
+ //
+ // If allowExpired is true, expired addresses may be returned.
+ //
+ // The returned endpoint's reference count is incremented.
+ //
+ // Returns nil if a primary endpoint is not available.
+ AcquirePrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint
+
+ // PrimaryAddresses returns the primary addresses.
+ PrimaryAddresses() []tcpip.AddressWithPrefix
+
+ // PermanentAddresses returns all the permanent addresses.
+ PermanentAddresses() []tcpip.AddressWithPrefix
+}
+
+// NDPEndpoint is a network endpoint that supports NDP.
+type NDPEndpoint interface {
+ NetworkEndpoint
+
+ // InvalidateDefaultRouter invalidates a default router discovered through
+ // NDP.
+ InvalidateDefaultRouter(tcpip.Address)
+}
+
+// NetworkInterface is a network interface.
+type NetworkInterface interface {
+ // ID returns the interface's ID.
+ ID() tcpip.NICID
+
+ // IsLoopback returns true if the interface is a loopback interface.
+ IsLoopback() bool
+
+ // Name returns the name of the interface.
+ //
+ // May return an empty string if the interface is not configured with a name.
+ Name() string
+
+ // Enabled returns true if the interface is enabled.
+ Enabled() bool
+
+ // LinkEndpoint returns the link endpoint backing the interface.
+ LinkEndpoint() LinkEndpoint
+}
+
// NetworkEndpoint is the interface that needs to be implemented by endpoints
// of network layer protocols (e.g., ipv4, ipv6).
type NetworkEndpoint interface {
+ AddressableEndpoint
+
+ // Enable enables the endpoint.
+ //
+ // Must only be called when the stack is in a state that allows the endpoint
+ // to send and receive packets.
+ //
+ // Returns tcpip.ErrNotPermitted if the endpoint cannot be enabled.
+ Enable() *tcpip.Error
+
+ // Enabled returns true if the endpoint is enabled.
+ Enabled() bool
+
+ // Disable disables the endpoint.
+ Disable()
+
// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
// for this endpoint.
DefaultTTL() uint8
@@ -256,10 +518,6 @@ type NetworkEndpoint interface {
// minus the network endpoint max header length.
MTU() uint32
- // Capabilities returns the set of capabilities supported by the
- // underlying link-layer endpoint.
- Capabilities() LinkEndpointCapabilities
-
// MaxHeaderLength returns the maximum size the network (and lower
// level layers combined) headers can have. Higher levels use this
// information to reserve space in the front of the packets they're
@@ -280,9 +538,6 @@ type NetworkEndpoint interface {
// header to the given destination address. It takes ownership of pkt.
WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error
- // NICID returns the id of the NIC this endpoint belongs to.
- NICID() tcpip.NICID
-
// HandlePacket is called by the link layer when new packets arrive to
// this network endpoint. It sets pkt.NetworkHeader.
//
@@ -297,6 +552,17 @@ type NetworkEndpoint interface {
NetworkProtocolNumber() tcpip.NetworkProtocolNumber
}
+// ForwardingNetworkProtocol is a NetworkProtocol that may forward packets.
+type ForwardingNetworkProtocol interface {
+ NetworkProtocol
+
+ // Forwarding returns the forwarding configuration.
+ Forwarding() bool
+
+ // SetForwarding sets the forwarding configuration.
+ SetForwarding(bool)
+}
+
// NetworkProtocol is the interface that needs to be implemented by network
// protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
type NetworkProtocol interface {
@@ -316,7 +582,7 @@ type NetworkProtocol interface {
ParseAddresses(v buffer.View) (src, dst tcpip.Address)
// NewEndpoint creates a new endpoint of this protocol.
- NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, nud NUDHandler, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint
+ NewEndpoint(nic NetworkInterface, linkAddrCache LinkAddressCache, nud NUDHandler, dispatcher TransportDispatcher) NetworkEndpoint
// SetOption allows enabling/disabling protocol specific features.
// SetOption returns an error if the option is not supported or the
@@ -342,19 +608,6 @@ type NetworkProtocol interface {
// does not encapsulate anything).
// - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader.
Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool)
-
- // ReturnError attempts to send a suitable error message to the sender
- // of a received packet.
- // - pkt holds the problematic packet.
- // - reason indicates what the reason for wanting a message is.
- // - route is the routing information for the received packet
- // ReturnError returns an error if the send failed and nil on success.
- // Note that deciding to deliberately send no message is a success.
- //
- // TODO(gvisor.dev/issues/3871): This method should be removed or simplified
- // after all (or all but one) of the ICMP error dispatch occurs through the
- // protocol specific modules. May become SendPortNotFound(r, pkt).
- ReturnError(r *Route, reason tcpip.ICMPReason, pkt *PacketBuffer) *tcpip.Error
}
// NetworkDispatcher contains the methods used by the network stack to deliver
@@ -458,8 +711,8 @@ type LinkEndpoint interface {
// Attach attaches the data link layer endpoint to the network-layer
// dispatcher of the stack.
//
- // Attach will be called with a nil dispatcher if the receiver's associated
- // NIC is being removed.
+ // Attach is called with a nil dispatcher when the endpoint's NIC is being
+ // removed.
Attach(dispatcher NetworkDispatcher)
// IsAttached returns whether a NetworkDispatcher is attached to the
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 2cbbf0de8..5ade3c832 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -42,17 +42,27 @@ type Route struct {
// NetProto is the network-layer protocol.
NetProto tcpip.NetworkProtocolNumber
- // ref a reference to the network endpoint through which the route
- // starts.
- ref *referencedNetworkEndpoint
-
// Loop controls where WritePacket should send packets.
Loop PacketLooping
+
+ // nic is the NIC the route goes through.
+ nic *NIC
+
+ // addressEndpoint is the local address this route is associated with.
+ addressEndpoint AssignableAddressEndpoint
+
+ // linkCache is set if link address resolution is enabled for this protocol on
+ // the route's NIC.
+ linkCache LinkAddressCache
+
+ // linkRes is set if link address resolution is enabled for this protocol on
+ // the route's NIC.
+ linkRes LinkAddressResolver
}
// makeRoute initializes a new route. It takes ownership of the provided
-// reference to a network endpoint.
-func makeRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, localLinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, handleLocal, multicastLoop bool) Route {
+// AssignableAddressEndpoint.
+func makeRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, nic *NIC, addressEndpoint AssignableAddressEndpoint, handleLocal, multicastLoop bool) Route {
loop := PacketOut
if handleLocal && localAddr != "" && remoteAddr == localAddr {
loop = PacketLoop
@@ -62,29 +72,40 @@ func makeRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip
loop |= PacketLoop
}
- return Route{
+ linkEP := nic.LinkEndpoint()
+ r := Route{
NetProto: netProto,
LocalAddress: localAddr,
- LocalLinkAddress: localLinkAddr,
+ LocalLinkAddress: linkEP.LinkAddress(),
RemoteAddress: remoteAddr,
- ref: ref,
+ addressEndpoint: addressEndpoint,
+ nic: nic,
Loop: loop,
}
+
+ if nic := r.nic; linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
+ if linkRes, ok := nic.stack.linkAddrResolvers[r.NetProto]; ok {
+ r.linkRes = linkRes
+ r.linkCache = nic.stack
+ }
+ }
+
+ return r
}
// NICID returns the id of the NIC from which this route originates.
func (r *Route) NICID() tcpip.NICID {
- return r.ref.ep.NICID()
+ return r.nic.ID()
}
// MaxHeaderLength forwards the call to the network endpoint's implementation.
func (r *Route) MaxHeaderLength() uint16 {
- return r.ref.ep.MaxHeaderLength()
+ return r.addressEndpoint.NetworkEndpoint().MaxHeaderLength()
}
// Stats returns a mutable copy of current stats.
func (r *Route) Stats() tcpip.Stats {
- return r.ref.nic.stack.Stats()
+ return r.nic.stack.Stats()
}
// PseudoHeaderChecksum forwards the call to the network endpoint's
@@ -95,12 +116,12 @@ func (r *Route) PseudoHeaderChecksum(protocol tcpip.TransportProtocolNumber, tot
// Capabilities returns the link-layer capabilities of the route.
func (r *Route) Capabilities() LinkEndpointCapabilities {
- return r.ref.ep.Capabilities()
+ return r.nic.LinkEndpoint().Capabilities()
}
// GSOMaxSize returns the maximum GSO packet size.
func (r *Route) GSOMaxSize() uint32 {
- if gso, ok := r.ref.ep.(GSOEndpoint); ok {
+ if gso, ok := r.addressEndpoint.NetworkEndpoint().(GSOEndpoint); ok {
return gso.GSOMaxSize()
}
return 0
@@ -138,8 +159,8 @@ func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) {
nextAddr = r.RemoteAddress
}
- if r.ref.nic.neigh != nil {
- entry, ch, err := r.ref.nic.neigh.entry(nextAddr, r.LocalAddress, r.ref.linkRes, waker)
+ if neigh := r.nic.neigh; neigh != nil {
+ entry, ch, err := neigh.entry(nextAddr, r.LocalAddress, r.linkRes, waker)
if err != nil {
return ch, err
}
@@ -147,7 +168,7 @@ func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) {
return nil, nil
}
- linkAddr, ch, err := r.ref.linkCache.GetLinkAddress(r.ref.nic.ID(), nextAddr, r.LocalAddress, r.NetProto, waker)
+ linkAddr, ch, err := r.linkCache.GetLinkAddress(r.nic.ID(), nextAddr, r.LocalAddress, r.NetProto, waker)
if err != nil {
return ch, err
}
@@ -162,12 +183,12 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
nextAddr = r.RemoteAddress
}
- if r.ref.nic.neigh != nil {
- r.ref.nic.neigh.removeWaker(nextAddr, waker)
+ if neigh := r.nic.neigh; neigh != nil {
+ neigh.removeWaker(nextAddr, waker)
return
}
- r.ref.linkCache.RemoveWaker(r.ref.nic.ID(), nextAddr, waker)
+ r.linkCache.RemoveWaker(r.nic.ID(), nextAddr, waker)
}
// IsResolutionRequired returns true if Resolve() must be called to resolve
@@ -175,27 +196,27 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
//
// The NIC r uses must not be locked.
func (r *Route) IsResolutionRequired() bool {
- if r.ref.nic.neigh != nil {
- return r.ref.isValidForOutgoing() && r.ref.linkRes != nil && r.RemoteLinkAddress == ""
+ if r.nic.neigh != nil {
+ return r.nic.isValidForOutgoing(r.addressEndpoint) && r.linkRes != nil && r.RemoteLinkAddress == ""
}
- return r.ref.isValidForOutgoing() && r.ref.linkCache != nil && r.RemoteLinkAddress == ""
+ return r.nic.isValidForOutgoing(r.addressEndpoint) && r.linkCache != nil && r.RemoteLinkAddress == ""
}
// WritePacket writes the packet through the given route.
func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error {
- if !r.ref.isValidForOutgoing() {
+ if !r.nic.isValidForOutgoing(r.addressEndpoint) {
return tcpip.ErrInvalidEndpointState
}
// WritePacket takes ownership of pkt, calculate numBytes first.
numBytes := pkt.Size()
- err := r.ref.ep.WritePacket(r, gso, params, pkt)
+ err := r.addressEndpoint.NetworkEndpoint().WritePacket(r, gso, params, pkt)
if err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
} else {
- r.ref.nic.stats.Tx.Packets.Increment()
- r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
+ r.nic.stats.Tx.Packets.Increment()
+ r.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
}
return err
}
@@ -203,76 +224,75 @@ func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuf
// WritePackets writes a list of n packets through the given route and returns
// the number of packets written.
func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) {
- if !r.ref.isValidForOutgoing() {
+ if !r.nic.isValidForOutgoing(r.addressEndpoint) {
return 0, tcpip.ErrInvalidEndpointState
}
// WritePackets takes ownership of pkt, calculate length first.
numPkts := pkts.Len()
- n, err := r.ref.ep.WritePackets(r, gso, pkts, params)
+ n, err := r.addressEndpoint.NetworkEndpoint().WritePackets(r, gso, pkts, params)
if err != nil {
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(numPkts - n))
}
- r.ref.nic.stats.Tx.Packets.IncrementBy(uint64(n))
+ r.nic.stats.Tx.Packets.IncrementBy(uint64(n))
writtenBytes := 0
for i, pb := 0, pkts.Front(); i < n && pb != nil; i, pb = i+1, pb.Next() {
writtenBytes += pb.Size()
}
- r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(writtenBytes))
+ r.nic.stats.Tx.Bytes.IncrementBy(uint64(writtenBytes))
return n, err
}
// WriteHeaderIncludedPacket writes a packet already containing a network
// header through the given route.
func (r *Route) WriteHeaderIncludedPacket(pkt *PacketBuffer) *tcpip.Error {
- if !r.ref.isValidForOutgoing() {
+ if !r.nic.isValidForOutgoing(r.addressEndpoint) {
return tcpip.ErrInvalidEndpointState
}
// WriteHeaderIncludedPacket takes ownership of pkt, calculate numBytes first.
numBytes := pkt.Data.Size()
- if err := r.ref.ep.WriteHeaderIncludedPacket(r, pkt); err != nil {
+ if err := r.addressEndpoint.NetworkEndpoint().WriteHeaderIncludedPacket(r, pkt); err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
return err
}
- r.ref.nic.stats.Tx.Packets.Increment()
- r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
+ r.nic.stats.Tx.Packets.Increment()
+ r.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
return nil
}
// DefaultTTL returns the default TTL of the underlying network endpoint.
func (r *Route) DefaultTTL() uint8 {
- return r.ref.ep.DefaultTTL()
+ return r.addressEndpoint.NetworkEndpoint().DefaultTTL()
}
// MTU returns the MTU of the underlying network endpoint.
func (r *Route) MTU() uint32 {
- return r.ref.ep.MTU()
+ return r.addressEndpoint.NetworkEndpoint().MTU()
}
// NetworkProtocolNumber returns the NetworkProtocolNumber of the underlying
// network endpoint.
func (r *Route) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
- return r.ref.ep.NetworkProtocolNumber()
+ return r.addressEndpoint.NetworkEndpoint().NetworkProtocolNumber()
}
// Release frees all resources associated with the route.
func (r *Route) Release() {
- if r.ref != nil {
- r.ref.decRef()
- r.ref = nil
+ if r.addressEndpoint != nil {
+ r.addressEndpoint.DecRef()
+ r.addressEndpoint = nil
}
}
-// Clone Clone a route such that the original one can be released and the new
-// one will remain valid.
+// Clone clones the route.
func (r *Route) Clone() Route {
- if r.ref != nil {
- r.ref.incRef()
+ if r.addressEndpoint != nil {
+ _ = r.addressEndpoint.IncRef()
}
return *r
}
@@ -296,7 +316,7 @@ func (r *Route) MakeLoopedRoute() Route {
// Stack returns the instance of the Stack that owns this route.
func (r *Route) Stack() *Stack {
- return r.ref.stack()
+ return r.nic.stack
}
func (r *Route) isV4Broadcast(addr tcpip.Address) bool {
@@ -304,7 +324,7 @@ func (r *Route) isV4Broadcast(addr tcpip.Address) bool {
return true
}
- subnet := r.ref.addrWithPrefix().Subnet()
+ subnet := r.addressEndpoint.AddressWithPrefix().Subnet()
return subnet.IsBroadcast(addr)
}
@@ -330,7 +350,10 @@ func (r *Route) ReverseRoute(src tcpip.Address, dst tcpip.Address) Route {
LocalLinkAddress: r.RemoteLinkAddress,
RemoteAddress: src,
RemoteLinkAddress: r.LocalLinkAddress,
- ref: r.ref,
Loop: r.Loop,
+ addressEndpoint: r.addressEndpoint,
+ nic: r.nic,
+ linkCache: r.linkCache,
+ linkRes: r.linkRes,
}
}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 68cf77de2..57d8e79e0 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -144,10 +144,7 @@ type TCPReceiverState struct {
// PendingBufUsed is the number of bytes pending in the receive
// queue.
- PendingBufUsed seqnum.Size
-
- // PendingBufSize is the size of the socket receive buffer.
- PendingBufSize seqnum.Size
+ PendingBufUsed int
}
// TCPSenderState holds a copy of the internal state of the sender for
@@ -366,38 +363,6 @@ func (u *uniqueIDGenerator) UniqueID() uint64 {
return atomic.AddUint64((*uint64)(u), 1)
}
-// NICNameFromID is a function that returns a stable name for the specified NIC,
-// even if different NIC IDs are used to refer to the same NIC in different
-// program runs. It is used when generating opaque interface identifiers (IIDs).
-// If the NIC was created with a name, it will be passed to NICNameFromID.
-//
-// NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are
-// generated for the same prefix on differnt NICs.
-type NICNameFromID func(tcpip.NICID, string) string
-
-// OpaqueInterfaceIdentifierOptions holds the options related to the generation
-// of opaque interface indentifiers (IIDs) as defined by RFC 7217.
-type OpaqueInterfaceIdentifierOptions struct {
- // NICNameFromID is a function that returns a stable name for a specified NIC,
- // even if the NIC ID changes over time.
- //
- // Must be specified to generate the opaque IID.
- NICNameFromID NICNameFromID
-
- // SecretKey is a pseudo-random number used as the secret key when generating
- // opaque IIDs as defined by RFC 7217. The key SHOULD be at least
- // header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness
- // requirements for security as outlined by RFC 4086. SecretKey MUST NOT
- // change between program runs, unless explicitly changed.
- //
- // OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey
- // MUST NOT be modified after Stack is created.
- //
- // May be nil, but a nil value is highly discouraged to maintain
- // some level of randomness between nodes.
- SecretKey []byte
-}
-
// Stack is a networking stack, with all supported protocols, NICs, and route
// table.
type Stack struct {
@@ -405,13 +370,6 @@ type Stack struct {
networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol
linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver
- // forwarding contains the whether packet forwarding is enabled or not for
- // different network protocols.
- forwarding struct {
- sync.RWMutex
- protocols map[tcpip.NetworkProtocolNumber]bool
- }
-
// rawFactory creates raw endpoints. If nil, raw endpoints are
// disabled. It is set during Stack creation and is immutable.
rawFactory RawFactory
@@ -464,9 +422,6 @@ type Stack struct {
// TODO(gvisor.dev/issue/940): S/R this field.
seed uint32
- // ndpConfigs is the default NDP configurations used by interfaces.
- ndpConfigs NDPConfigurations
-
// nudConfigs is the default NUD configurations used by interfaces.
nudConfigs NUDConfigurations
@@ -474,15 +429,6 @@ type Stack struct {
// by the NIC's neighborCache instead of linkAddrCache.
useNeighborCache bool
- // autoGenIPv6LinkLocal determines whether or not the stack will attempt
- // to auto-generate an IPv6 link-local address for newly enabled non-loopback
- // NICs. See the AutoGenIPv6LinkLocal field of Options for more details.
- autoGenIPv6LinkLocal bool
-
- // ndpDisp is the NDP event dispatcher that is used to send the netstack
- // integrator NDP related events.
- ndpDisp NDPDispatcher
-
// nudDisp is the NUD event dispatcher that is used to send the netstack
// integrator NUD related events.
nudDisp NUDDispatcher
@@ -490,14 +436,6 @@ type Stack struct {
// uniqueIDGenerator is a generator of unique identifiers.
uniqueIDGenerator UniqueID
- // opaqueIIDOpts hold the options for generating opaque interface identifiers
- // (IIDs) as outlined by RFC 7217.
- opaqueIIDOpts OpaqueInterfaceIdentifierOptions
-
- // tempIIDSeed is used to seed the initial temporary interface identifier
- // history value used to generate IIDs for temporary SLAAC addresses.
- tempIIDSeed []byte
-
// forwarder holds the packets that wait for their link-address resolutions
// to complete, and forwards them when each resolution is done.
forwarder *forwardQueue
@@ -520,13 +458,25 @@ type UniqueID interface {
UniqueID() uint64
}
+// NetworkProtocolFactory instantiates a network protocol.
+//
+// NetworkProtocolFactory must not attempt to modify the stack, it may only
+// query the stack.
+type NetworkProtocolFactory func(*Stack) NetworkProtocol
+
+// TransportProtocolFactory instantiates a transport protocol.
+//
+// TransportProtocolFactory must not attempt to modify the stack, it may only
+// query the stack.
+type TransportProtocolFactory func(*Stack) TransportProtocol
+
// Options contains optional Stack configuration.
type Options struct {
// NetworkProtocols lists the network protocols to enable.
- NetworkProtocols []NetworkProtocol
+ NetworkProtocols []NetworkProtocolFactory
// TransportProtocols lists the transport protocols to enable.
- TransportProtocols []TransportProtocol
+ TransportProtocols []TransportProtocolFactory
// Clock is an optional clock source used for timestampping packets.
//
@@ -544,13 +494,6 @@ type Options struct {
// UniqueID is an optional generator of unique identifiers.
UniqueID UniqueID
- // NDPConfigs is the default NDP configurations used by interfaces.
- //
- // By default, NDPConfigs will have a zero value for its
- // DupAddrDetectTransmits field, implying that DAD will not be performed
- // before assigning an address to a NIC.
- NDPConfigs NDPConfigurations
-
// NUDConfigs is the default NUD configurations used by interfaces.
NUDConfigs NUDConfigurations
@@ -561,24 +504,6 @@ type Options struct {
// and ClearNeighbors.
UseNeighborCache bool
- // AutoGenIPv6LinkLocal determines whether or not the stack will attempt to
- // auto-generate an IPv6 link-local address for newly enabled non-loopback
- // NICs.
- //
- // Note, setting this to true does not mean that a link-local address
- // will be assigned right away, or at all. If Duplicate Address Detection
- // is enabled, an address will only be assigned if it successfully resolves.
- // If it fails, no further attempt will be made to auto-generate an IPv6
- // link-local address.
- //
- // The generated link-local address will follow RFC 4291 Appendix A
- // guidelines.
- AutoGenIPv6LinkLocal bool
-
- // NDPDisp is the NDP event dispatcher that an integrator can provide to
- // receive NDP related events.
- NDPDisp NDPDispatcher
-
// NUDDisp is the NUD event dispatcher that an integrator can provide to
// receive NUD related events.
NUDDisp NUDDispatcher
@@ -587,31 +512,12 @@ type Options struct {
// this is non-nil.
RawFactory RawFactory
- // OpaqueIIDOpts hold the options for generating opaque interface
- // identifiers (IIDs) as outlined by RFC 7217.
- OpaqueIIDOpts OpaqueInterfaceIdentifierOptions
-
// RandSource is an optional source to use to generate random
// numbers. If omitted it defaults to a Source seeded by the data
// returned by rand.Read().
//
// RandSource must be thread-safe.
RandSource mathrand.Source
-
- // TempIIDSeed is used to seed the initial temporary interface identifier
- // history value used to generate IIDs for temporary SLAAC addresses.
- //
- // Temporary SLAAC adresses are short-lived addresses which are unpredictable
- // and random from the perspective of other nodes on the network. It is
- // recommended that the seed be a random byte buffer of at least
- // header.IIDSize bytes to make sure that temporary SLAAC addresses are
- // sufficiently random. It should follow minimum randomness requirements for
- // security as outlined by RFC 4086.
- //
- // Note: using a nil value, the same seed across netstack program runs, or a
- // seed that is too small would reduce randomness and increase predictability,
- // defeating the purpose of temporary SLAAC addresses.
- TempIIDSeed []byte
}
// TransportEndpointInfo holds useful information about a transport endpoint
@@ -714,36 +620,28 @@ func New(opts Options) *Stack {
randSrc = &lockedRandomSource{src: mathrand.NewSource(generateRandInt64())}
}
- // Make sure opts.NDPConfigs contains valid values only.
- opts.NDPConfigs.validate()
-
opts.NUDConfigs.resetInvalidFields()
s := &Stack{
- transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
- networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
- linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]LinkAddressResolver),
- nics: make(map[tcpip.NICID]*NIC),
- cleanupEndpoints: make(map[TransportEndpoint]struct{}),
- linkAddrCache: newLinkAddrCache(ageLimit, resolutionTimeout, resolutionAttempts),
- PortManager: ports.NewPortManager(),
- clock: clock,
- stats: opts.Stats.FillIn(),
- handleLocal: opts.HandleLocal,
- tables: DefaultTables(),
- icmpRateLimiter: NewICMPRateLimiter(),
- seed: generateRandUint32(),
- ndpConfigs: opts.NDPConfigs,
- nudConfigs: opts.NUDConfigs,
- useNeighborCache: opts.UseNeighborCache,
- autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal,
- uniqueIDGenerator: opts.UniqueID,
- ndpDisp: opts.NDPDisp,
- nudDisp: opts.NUDDisp,
- opaqueIIDOpts: opts.OpaqueIIDOpts,
- tempIIDSeed: opts.TempIIDSeed,
- forwarder: newForwardQueue(),
- randomGenerator: mathrand.New(randSrc),
+ transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
+ networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
+ linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]LinkAddressResolver),
+ nics: make(map[tcpip.NICID]*NIC),
+ cleanupEndpoints: make(map[TransportEndpoint]struct{}),
+ linkAddrCache: newLinkAddrCache(ageLimit, resolutionTimeout, resolutionAttempts),
+ PortManager: ports.NewPortManager(),
+ clock: clock,
+ stats: opts.Stats.FillIn(),
+ handleLocal: opts.HandleLocal,
+ tables: DefaultTables(),
+ icmpRateLimiter: NewICMPRateLimiter(),
+ seed: generateRandUint32(),
+ nudConfigs: opts.NUDConfigs,
+ useNeighborCache: opts.UseNeighborCache,
+ uniqueIDGenerator: opts.UniqueID,
+ nudDisp: opts.NUDDisp,
+ forwarder: newForwardQueue(),
+ randomGenerator: mathrand.New(randSrc),
sendBufferSize: SendBufferSizeOption{
Min: MinBufferSize,
Default: DefaultBufferSize,
@@ -755,10 +653,10 @@ func New(opts Options) *Stack {
Max: DefaultMaxBufferSize,
},
}
- s.forwarding.protocols = make(map[tcpip.NetworkProtocolNumber]bool)
// Add specified network protocols.
- for _, netProto := range opts.NetworkProtocols {
+ for _, netProtoFactory := range opts.NetworkProtocols {
+ netProto := netProtoFactory(s)
s.networkProtocols[netProto.Number()] = netProto
if r, ok := netProto.(LinkAddressResolver); ok {
s.linkAddrResolvers[r.LinkAddressProtocol()] = r
@@ -766,7 +664,8 @@ func New(opts Options) *Stack {
}
// Add specified transport protocols.
- for _, transProto := range opts.TransportProtocols {
+ for _, transProtoFactory := range opts.TransportProtocols {
+ transProto := transProtoFactory(s)
s.transportProtocols[transProto.Number()] = &transportProtocolState{
proto: transProto,
}
@@ -873,42 +772,37 @@ func (s *Stack) Stats() tcpip.Stats {
return s.stats
}
-// SetForwarding enables or disables packet forwarding between NICs.
-func (s *Stack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) {
- s.forwarding.Lock()
- defer s.forwarding.Unlock()
-
- // If this stack does not support the protocol, do nothing.
- if _, ok := s.networkProtocols[protocol]; !ok {
- return
+// SetForwarding enables or disables packet forwarding between NICs for the
+// passed protocol.
+func (s *Stack) SetForwarding(protocolNum tcpip.NetworkProtocolNumber, enable bool) *tcpip.Error {
+ protocol, ok := s.networkProtocols[protocolNum]
+ if !ok {
+ return tcpip.ErrUnknownProtocol
}
- // If the forwarding value for this protocol hasn't changed then do
- // nothing.
- if forwarding := s.forwarding.protocols[protocol]; forwarding == enable {
- return
+ forwardingProtocol, ok := protocol.(ForwardingNetworkProtocol)
+ if !ok {
+ return tcpip.ErrNotSupported
}
- s.forwarding.protocols[protocol] = enable
+ forwardingProtocol.SetForwarding(enable)
+ return nil
+}
- if protocol == header.IPv6ProtocolNumber {
- if enable {
- for _, nic := range s.nics {
- nic.becomeIPv6Router()
- }
- } else {
- for _, nic := range s.nics {
- nic.becomeIPv6Host()
- }
- }
+// Forwarding returns true if packet forwarding between NICs is enabled for the
+// passed protocol.
+func (s *Stack) Forwarding(protocolNum tcpip.NetworkProtocolNumber) bool {
+ protocol, ok := s.networkProtocols[protocolNum]
+ if !ok {
+ return false
+ }
+
+ forwardingProtocol, ok := protocol.(ForwardingNetworkProtocol)
+ if !ok {
+ return false
}
-}
-// Forwarding returns if packet forwarding between NICs is enabled.
-func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool {
- s.forwarding.RLock()
- defer s.forwarding.RUnlock()
- return s.forwarding.protocols[protocol]
+ return forwardingProtocol.Forwarding()
}
// SetRouteTable assigns the route table to be used by this stack. It
@@ -943,7 +837,7 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp
return nil, tcpip.ErrUnknownProtocol
}
- return t.proto.NewEndpoint(s, network, waiterQueue)
+ return t.proto.NewEndpoint(network, waiterQueue)
}
// NewRawEndpoint creates a new raw transport layer endpoint of the given
@@ -963,7 +857,7 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network
return nil, tcpip.ErrUnknownProtocol
}
- return t.proto.NewRawEndpoint(s, network, waiterQueue)
+ return t.proto.NewRawEndpoint(network, waiterQueue)
}
// NewPacketEndpoint creates a new packet endpoint listening for the given
@@ -1070,7 +964,8 @@ func (s *Stack) DisableNIC(id tcpip.NICID) *tcpip.Error {
return tcpip.ErrUnknownNICID
}
- return nic.disable()
+ nic.disable()
+ return nil
}
// CheckNIC checks if a NIC is usable.
@@ -1083,7 +978,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool {
return false
}
- return nic.enabled()
+ return nic.Enabled()
}
// RemoveNIC removes NIC and all related routes from the network stack.
@@ -1161,14 +1056,14 @@ func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
for id, nic := range s.nics {
flags := NICStateFlags{
Up: true, // Netstack interfaces are always up.
- Running: nic.enabled(),
+ Running: nic.Enabled(),
Promiscuous: nic.isPromiscuousMode(),
- Loopback: nic.isLoopback(),
+ Loopback: nic.IsLoopback(),
}
nics[id] = NICInfo{
Name: nic.name,
LinkAddress: nic.linkEP.LinkAddress(),
- ProtocolAddresses: nic.PrimaryAddresses(),
+ ProtocolAddresses: nic.primaryAddresses(),
Flags: flags,
MTU: nic.linkEP.MTU(),
Stats: nic.stats,
@@ -1232,7 +1127,7 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc
return tcpip.ErrUnknownNICID
}
- return nic.AddAddress(protocolAddress, peb)
+ return nic.addAddress(protocolAddress, peb)
}
// RemoveAddress removes an existing network-layer address from the specified
@@ -1242,7 +1137,7 @@ func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error {
defer s.mu.RUnlock()
if nic, ok := s.nics[id]; ok {
- return nic.RemoveAddress(addr)
+ return nic.removeAddress(addr)
}
return tcpip.ErrUnknownNICID
@@ -1256,7 +1151,7 @@ func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress {
nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress)
for id, nic := range s.nics {
- nics[id] = nic.AllAddresses()
+ nics[id] = nic.allPermanentAddresses()
}
return nics
}
@@ -1278,7 +1173,7 @@ func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocol
return nic.primaryAddress(protocol), nil
}
-func (s *Stack) getRefEP(nic *NIC, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) (ref *referencedNetworkEndpoint) {
+func (s *Stack) getAddressEP(nic *NIC, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint {
if len(localAddr) == 0 {
return nic.primaryEndpoint(netProto, remoteAddr)
}
@@ -1295,9 +1190,9 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n
isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
needRoute := !(isLocalBroadcast || isMulticast || header.IsV6LinkLocalAddress(remoteAddr))
if id != 0 && !needRoute {
- if nic, ok := s.nics[id]; ok && nic.enabled() {
- if ref := s.getRefEP(nic, localAddr, remoteAddr, netProto); ref != nil {
- return makeRoute(netProto, ref.address(), remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback()), nil
+ if nic, ok := s.nics[id]; ok && nic.Enabled() {
+ if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
+ return makeRoute(netProto, addressEndpoint.AddressWithPrefix().Address, remoteAddr, nic, addressEndpoint, s.handleLocal && !nic.IsLoopback(), multicastLoop && !nic.IsLoopback()), nil
}
}
} else {
@@ -1305,20 +1200,20 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n
if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr)) {
continue
}
- if nic, ok := s.nics[route.NIC]; ok && nic.enabled() {
- if ref := s.getRefEP(nic, localAddr, remoteAddr, netProto); ref != nil {
+ if nic, ok := s.nics[route.NIC]; ok && nic.Enabled() {
+ if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
if len(remoteAddr) == 0 {
// If no remote address was provided, then the route
// provided will refer to the link local address.
- remoteAddr = ref.address()
+ remoteAddr = addressEndpoint.AddressWithPrefix().Address
}
- r := makeRoute(netProto, ref.address(), remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback())
+ r := makeRoute(netProto, addressEndpoint.AddressWithPrefix().Address, remoteAddr, nic, addressEndpoint, s.handleLocal && !nic.IsLoopback(), multicastLoop && !nic.IsLoopback())
if len(route.Gateway) > 0 {
if needRoute {
r.NextHop = route.Gateway
}
- } else if subnet := ref.addrWithPrefix().Subnet(); subnet.IsBroadcast(remoteAddr) {
+ } else if subnet := addressEndpoint.AddressWithPrefix().Subnet(); subnet.IsBroadcast(remoteAddr) {
r.RemoteLinkAddress = header.EthernetBroadcastAddress
}
@@ -1356,21 +1251,20 @@ func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProto
return 0
}
- ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint)
- if ref == nil {
+ addressEndpoint := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint)
+ if addressEndpoint == nil {
return 0
}
- ref.decRef()
+ addressEndpoint.DecRef()
return nic.id
}
// Go through all the NICs.
for _, nic := range s.nics {
- ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint)
- if ref != nil {
- ref.decRef()
+ if addressEndpoint := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint); addressEndpoint != nil {
+ addressEndpoint.DecRef()
return nic.id
}
}
@@ -1839,7 +1733,7 @@ func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NIC
defer s.mu.RUnlock()
if nic, ok := s.nics[nicID]; ok {
- return nic.leaveGroup(multicastAddr)
+ return nic.leaveGroup(protocol, multicastAddr)
}
return tcpip.ErrUnknownNICID
}
@@ -1891,53 +1785,18 @@ func (s *Stack) AllowICMPMessage() bool {
return s.icmpRateLimiter.Allow()
}
-// IsAddrTentative returns true if addr is tentative on the NIC with ID id.
-//
-// Note that if addr is not associated with a NIC with id ID, then this
-// function will return false. It will only return true if the address is
-// associated with the NIC AND it is tentative.
-func (s *Stack) IsAddrTentative(id tcpip.NICID, addr tcpip.Address) (bool, *tcpip.Error) {
- s.mu.RLock()
- defer s.mu.RUnlock()
-
- nic, ok := s.nics[id]
- if !ok {
- return false, tcpip.ErrUnknownNICID
- }
-
- return nic.isAddrTentative(addr), nil
-}
-
-// DupTentativeAddrDetected attempts to inform the NIC with ID id that a
-// tentative addr on it is a duplicate on a link.
-func (s *Stack) DupTentativeAddrDetected(id tcpip.NICID, addr tcpip.Address) *tcpip.Error {
- s.mu.Lock()
- defer s.mu.Unlock()
-
- nic, ok := s.nics[id]
- if !ok {
- return tcpip.ErrUnknownNICID
- }
-
- return nic.dupTentativeAddrDetected(addr)
-}
-
-// SetNDPConfigurations sets the per-interface NDP configurations on the NIC
-// with ID id to c.
-//
-// Note, if c contains invalid NDP configuration values, it will be fixed to
-// use default values for the erroneous values.
-func (s *Stack) SetNDPConfigurations(id tcpip.NICID, c NDPConfigurations) *tcpip.Error {
+// GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol
+// number installed on the specified NIC.
+func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, *tcpip.Error) {
s.mu.Lock()
defer s.mu.Unlock()
- nic, ok := s.nics[id]
+ nic, ok := s.nics[nicID]
if !ok {
- return tcpip.ErrUnknownNICID
+ return nil, tcpip.ErrUnknownNICID
}
- nic.setNDPConfigs(c)
- return nil
+ return nic.networkEndpoints[proto], nil
}
// NUDConfigurations gets the per-interface NUD configurations.
@@ -1950,7 +1809,7 @@ func (s *Stack) NUDConfigurations(id tcpip.NICID) (NUDConfigurations, *tcpip.Err
return NUDConfigurations{}, tcpip.ErrUnknownNICID
}
- return nic.NUDConfigs()
+ return nic.nudConfigs()
}
// SetNUDConfigurations sets the per-interface NUD configurations.
@@ -1969,22 +1828,6 @@ func (s *Stack) SetNUDConfigurations(id tcpip.NICID, c NUDConfigurations) *tcpip
return nic.setNUDConfigs(c)
}
-// HandleNDPRA provides a NIC with ID id a validated NDP Router Advertisement
-// message that it needs to handle.
-func (s *Stack) HandleNDPRA(id tcpip.NICID, ip tcpip.Address, ra header.NDPRouterAdvert) *tcpip.Error {
- s.mu.Lock()
- defer s.mu.Unlock()
-
- nic, ok := s.nics[id]
- if !ok {
- return tcpip.ErrUnknownNICID
- }
-
- nic.handleNDPRA(ip, ra)
-
- return nil
-}
-
// Seed returns a 32 bit value that can be used as a seed value for port
// picking, ISN generation etc.
//
@@ -2026,16 +1869,14 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres
defer s.mu.RUnlock()
for _, nic := range s.nics {
- id := NetworkEndpointID{address}
-
- if ref, ok := nic.mu.endpoints[id]; ok {
- nic.mu.RLock()
- defer nic.mu.RUnlock()
-
- // An endpoint with this id exists, check if it can be
- // used and return it.
- return ref.ep, nil
+ addressEndpoint := nic.getAddressOrCreateTempInner(netProto, address, false /* createTemp */, NeverPrimaryEndpoint)
+ if addressEndpoint == nil {
+ continue
}
+
+ ep := addressEndpoint.NetworkEndpoint()
+ addressEndpoint.DecRef()
+ return ep, nil
}
return nil, tcpip.ErrBadAddress
}
@@ -2052,3 +1893,8 @@ func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
return nic.Name()
}
+
+// NewJob returns a new tcpip.Job using the stack's clock.
+func (s *Stack) NewJob(l sync.Locker, f func()) *tcpip.Job {
+ return tcpip.NewJob(s.clock, l, f)
+}
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 9ef6787c6..fda22c550 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -29,6 +29,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -68,18 +69,26 @@ const (
// use the first three: destination address, source address, and transport
// protocol. They're all one byte fields to simplify parsing.
type fakeNetworkEndpoint struct {
+ stack.AddressableEndpointState
+
nicID tcpip.NICID
proto *fakeNetworkProtocol
dispatcher stack.TransportDispatcher
ep stack.LinkEndpoint
}
-func (f *fakeNetworkEndpoint) MTU() uint32 {
- return f.ep.MTU() - uint32(f.MaxHeaderLength())
+func (*fakeNetworkEndpoint) Enable() *tcpip.Error {
+ return nil
}
-func (f *fakeNetworkEndpoint) NICID() tcpip.NICID {
- return f.nicID
+func (*fakeNetworkEndpoint) Enabled() bool {
+ return true
+}
+
+func (*fakeNetworkEndpoint) Disable() {}
+
+func (f *fakeNetworkEndpoint) MTU() uint32 {
+ return f.ep.MTU() - uint32(f.MaxHeaderLength())
}
func (*fakeNetworkEndpoint) DefaultTTL() uint8 {
@@ -118,10 +127,6 @@ func (f *fakeNetworkEndpoint) PseudoHeaderChecksum(protocol tcpip.TransportProto
return 0
}
-func (f *fakeNetworkEndpoint) Capabilities() stack.LinkEndpointCapabilities {
- return f.ep.Capabilities()
-}
-
func (f *fakeNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
return f.proto.Number()
}
@@ -156,7 +161,9 @@ func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack
return tcpip.ErrNotSupported
}
-func (*fakeNetworkEndpoint) Close() {}
+func (f *fakeNetworkEndpoint) Close() {
+ f.AddressableEndpointState.Cleanup()
+}
// fakeNetworkProtocol is a network-layer protocol descriptor. It aggregates the
// number of packets sent and received via endpoints of this protocol. The index
@@ -165,6 +172,11 @@ type fakeNetworkProtocol struct {
packetCount [10]int
sendPacketCount [10]int
defaultTTL uint8
+
+ mu struct {
+ sync.RWMutex
+ forwarding bool
+ }
}
func (f *fakeNetworkProtocol) Number() tcpip.NetworkProtocolNumber {
@@ -187,13 +199,15 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres
return tcpip.Address(v[srcAddrOffset : srcAddrOffset+1]), tcpip.Address(v[dstAddrOffset : dstAddrOffset+1])
}
-func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint {
- return &fakeNetworkEndpoint{
- nicID: nicID,
+func (f *fakeNetworkProtocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
+ e := &fakeNetworkEndpoint{
+ nicID: nic.ID(),
proto: f,
dispatcher: dispatcher,
- ep: ep,
+ ep: nic.LinkEndpoint(),
}
+ e.AddressableEndpointState.Init(e)
+ return e
}
func (f *fakeNetworkProtocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
@@ -216,11 +230,6 @@ func (f *fakeNetworkProtocol) Option(option tcpip.GettableNetworkProtocolOption)
}
}
-// ReturnError implements NetworkProtocol.ReturnError
-func (*fakeNetworkProtocol) ReturnError(*stack.Route, tcpip.ICMPReason, *stack.PacketBuffer) *tcpip.Error {
- return nil
-}
-
// Close implements NetworkProtocol.Close.
func (*fakeNetworkProtocol) Close() {}
@@ -236,7 +245,21 @@ func (*fakeNetworkProtocol) Parse(pkt *stack.PacketBuffer) (tcpip.TransportProto
return tcpip.TransportProtocolNumber(hdr[protocolNumberOffset]), true, true
}
-func fakeNetFactory() stack.NetworkProtocol {
+// Forwarding implements stack.ForwardingNetworkProtocol.
+func (f *fakeNetworkProtocol) Forwarding() bool {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+ return f.mu.forwarding
+}
+
+// SetForwarding implements stack.ForwardingNetworkProtocol.
+func (f *fakeNetworkProtocol) SetForwarding(v bool) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ f.mu.forwarding = v
+}
+
+func fakeNetFactory(*stack.Stack) stack.NetworkProtocol {
return &fakeNetworkProtocol{}
}
@@ -273,7 +296,7 @@ func TestNetworkReceive(t *testing.T) {
// addresses attached to it: 1 & 2.
ep := channel.New(10, defaultMTU, "")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
if err := s.CreateNIC(1, ep); err != nil {
t.Fatal("CreateNIC failed:", err)
@@ -433,7 +456,7 @@ func TestNetworkSend(t *testing.T) {
// existing nic.
ep := channel.New(10, defaultMTU, "")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
if err := s.CreateNIC(1, ep); err != nil {
t.Fatal("NewNIC failed:", err)
@@ -460,7 +483,7 @@ func TestNetworkSendMultiRoute(t *testing.T) {
// addresses per nic, the first nic has odd address, the second one has
// even addresses.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep1 := channel.New(10, defaultMTU, "")
@@ -560,7 +583,7 @@ func TestAttachToLinkEndpointImmediately(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
e := linkEPWithMockedAttach{
@@ -579,7 +602,7 @@ func TestAttachToLinkEndpointImmediately(t *testing.T) {
func TestDisableUnknownNIC(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
if err := s.DisableNIC(1); err != tcpip.ErrUnknownNICID {
@@ -591,7 +614,7 @@ func TestDisabledNICsNICInfoAndCheckNIC(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
e := loopback.New()
@@ -638,7 +661,7 @@ func TestDisabledNICsNICInfoAndCheckNIC(t *testing.T) {
func TestRemoveUnknownNIC(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
if err := s.RemoveNIC(1); err != tcpip.ErrUnknownNICID {
@@ -650,7 +673,7 @@ func TestRemoveNIC(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
e := linkEPWithMockedAttach{
@@ -711,7 +734,7 @@ func TestRouteWithDownNIC(t *testing.T) {
setup := func(t *testing.T) (*stack.Stack, *channel.Endpoint, *channel.Endpoint) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep1 := channel.New(1, defaultMTU, "")
@@ -877,7 +900,7 @@ func TestRoutes(t *testing.T) {
// addresses per nic, the first nic has odd address, the second one has
// even addresses.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep1 := channel.New(10, defaultMTU, "")
@@ -957,7 +980,7 @@ func TestAddressRemoval(t *testing.T) {
remoteAddr := tcpip.Address("\x02")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1004,7 +1027,7 @@ func TestAddressRemovalWithRouteHeld(t *testing.T) {
remoteAddr := tcpip.Address("\x02")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1095,7 +1118,7 @@ func TestEndpointExpiration(t *testing.T) {
for _, spoofing := range []bool{true, false} {
t.Run(fmt.Sprintf("promiscuous=%t spoofing=%t", promiscuous, spoofing), func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1253,7 +1276,7 @@ func TestEndpointExpiration(t *testing.T) {
func TestPromiscuousMode(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1305,7 +1328,7 @@ func TestSpoofingWithAddress(t *testing.T) {
dstAddr := tcpip.Address("\x03")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1371,7 +1394,7 @@ func TestSpoofingNoAddress(t *testing.T) {
dstAddr := tcpip.Address("\x02")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1434,7 +1457,7 @@ func verifyRoute(gotRoute, wantRoute stack.Route) error {
func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1477,7 +1500,7 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) {
// Create a new stack with two NICs.
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(1, ep); err != nil {
@@ -1578,7 +1601,7 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
} {
t.Run(tc.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
@@ -1635,8 +1658,8 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
func TestNetworkOption(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{},
})
opt := tcpip.DefaultTTLOption(5)
@@ -1662,7 +1685,7 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
for never := 0; never < 3; never++ {
t.Run(fmt.Sprintf("never=%d", never), func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(1, ep); err != nil {
@@ -1729,7 +1752,7 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
func TestGetMainNICAddressAddRemove(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(1, ep); err != nil {
@@ -1814,7 +1837,7 @@ func verifyAddresses(t *testing.T, expectedAddresses, gotAddresses []tcpip.Proto
func TestAddAddress(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(nicID, ep); err != nil {
@@ -1841,7 +1864,7 @@ func TestAddAddress(t *testing.T) {
func TestAddProtocolAddress(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(nicID, ep); err != nil {
@@ -1875,7 +1898,7 @@ func TestAddProtocolAddress(t *testing.T) {
func TestAddAddressWithOptions(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(nicID, ep); err != nil {
@@ -1906,7 +1929,7 @@ func TestAddAddressWithOptions(t *testing.T) {
func TestAddProtocolAddressWithOptions(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(nicID, ep); err != nil {
@@ -2027,7 +2050,7 @@ func TestCreateNICWithOptions(t *testing.T) {
func TestNICStats(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep1 := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(1, ep1); err != nil {
@@ -2094,7 +2117,7 @@ func TestNICForwarding(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
s.SetForwarding(fakeNetNumber, true)
@@ -2218,7 +2241,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName string
autoGen bool
linkAddr tcpip.LinkAddress
- iidOpts stack.OpaqueInterfaceIdentifierOptions
+ iidOpts ipv6.OpaqueInterfaceIdentifierOptions
shouldGen bool
expectedAddr tcpip.Address
}{
@@ -2234,7 +2257,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName: "nic1",
autoGen: false,
linkAddr: linkAddr1,
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
SecretKey: secretKey[:],
},
@@ -2279,7 +2302,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName: "nic1",
autoGen: true,
linkAddr: linkAddr1,
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
SecretKey: secretKey[:],
},
@@ -2291,7 +2314,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
{
name: "OIID Empty MAC and empty nicName",
autoGen: true,
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
SecretKey: secretKey[:1],
},
@@ -2303,7 +2326,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName: "test",
autoGen: true,
linkAddr: "\x01\x02\x03",
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
SecretKey: secretKey[:2],
},
@@ -2315,7 +2338,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName: "test2",
autoGen: true,
linkAddr: "\x01\x02\x03\x04\x05\x06",
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
SecretKey: secretKey[:3],
},
@@ -2327,7 +2350,7 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
nicName: "test3",
autoGen: true,
linkAddr: "\x00\x00\x00\x00\x00\x00",
- iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ iidOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: nicNameFunc,
},
shouldGen: true,
@@ -2341,10 +2364,11 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, 1),
}
opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- AutoGenIPv6LinkLocal: test.autoGen,
- NDPDisp: &ndpDisp,
- OpaqueIIDOpts: test.iidOpts,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ AutoGenIPv6LinkLocal: test.autoGen,
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: test.iidOpts,
+ })},
}
e := channel.New(0, 1280, test.linkAddr)
@@ -2416,15 +2440,15 @@ func TestNoLinkLocalAutoGenForLoopbackNIC(t *testing.T) {
tests := []struct {
name string
- opaqueIIDOpts stack.OpaqueInterfaceIdentifierOptions
+ opaqueIIDOpts ipv6.OpaqueInterfaceIdentifierOptions
}{
{
name: "IID From MAC",
- opaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{},
+ opaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{},
},
{
name: "Opaque IID",
- opaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
+ opaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
NICNameFromID: func(_ tcpip.NICID, nicName string) string {
return nicName
},
@@ -2435,9 +2459,10 @@ func TestNoLinkLocalAutoGenForLoopbackNIC(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- AutoGenIPv6LinkLocal: true,
- OpaqueIIDOpts: test.opaqueIIDOpts,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ AutoGenIPv6LinkLocal: true,
+ OpaqueIIDOpts: test.opaqueIIDOpts,
+ })},
}
e := loopback.New()
@@ -2466,12 +2491,13 @@ func TestNICAutoGenAddrDoesDAD(t *testing.T) {
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent),
}
- ndpConfigs := stack.DefaultNDPConfigurations()
+ ndpConfigs := ipv6.DefaultNDPConfigurations()
opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: ndpConfigs,
- AutoGenIPv6LinkLocal: true,
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ndpConfigs,
+ AutoGenIPv6LinkLocal: true,
+ NDPDisp: &ndpDisp,
+ })},
}
e := channel.New(int(ndpConfigs.DupAddrDetectTransmits), 1280, linkAddr1)
@@ -2527,7 +2553,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) {
for _, ps := range pebs {
t.Run(fmt.Sprintf("%d-to-%d", pi, ps), func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
})
ep1 := channel.New(10, defaultMTU, "")
if err := s.CreateNIC(1, ep1); err != nil {
@@ -2818,14 +2844,15 @@ func TestIPv6SourceAddressSelectionScopeAndSameAddress(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- HandleRAs: true,
- AutoGenGlobalAddresses: true,
- AutoGenTempGlobalAddresses: true,
- },
- NDPDisp: &ndpDispatcher{},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ HandleRAs: true,
+ AutoGenGlobalAddresses: true,
+ AutoGenTempGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDispatcher{},
+ })},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -2874,7 +2901,7 @@ func TestAddRemoveIPv4BroadcastAddressOnNICEnableDisable(t *testing.T) {
e := loopback.New()
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
})
nicOpts := stack.NICOptions{Disabled: true}
if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil {
@@ -2926,7 +2953,7 @@ func TestLeaveIPv6SolicitedNodeAddrBeforeAddrRemoval(t *testing.T) {
const nicID = 1
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol},
})
e := channel.New(10, 1280, linkAddr1)
if err := s.CreateNIC(1, e); err != nil {
@@ -2987,7 +3014,7 @@ func TestJoinLeaveMulticastOnNICEnableDisable(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
e := loopback.New()
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
})
nicOpts := stack.NICOptions{Disabled: true}
if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil {
@@ -3064,12 +3091,13 @@ func TestDoDADWhenNICEnabled(t *testing.T) {
dadC: make(chan ndpDADEvent),
}
opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- NDPConfigs: stack.NDPConfigurations{
- DupAddrDetectTransmits: dadTransmits,
- RetransmitTimer: retransmitTimer,
- },
- NDPDisp: &ndpDisp,
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ NDPConfigs: ipv6.NDPConfigurations{
+ DupAddrDetectTransmits: dadTransmits,
+ RetransmitTimer: retransmitTimer,
+ },
+ NDPDisp: &ndpDisp,
+ })},
}
e := channel.New(dadTransmits, 1280, linkAddr1)
@@ -3428,7 +3456,7 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
})
ep := channel.New(0, defaultMTU, "")
if err := s.CreateNIC(nicID1, ep); err != nil {
@@ -3466,7 +3494,7 @@ func TestResolveWith(t *testing.T) {
)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, arp.NewProtocol},
})
ep := channel.New(0, defaultMTU, "")
ep.LinkEPCapabilities |= stack.CapabilityResolutionRequired
@@ -3504,3 +3532,91 @@ func TestResolveWith(t *testing.T) {
t.Fatal("got r.IsResolutionRequired() = true, want = false")
}
}
+
+// TestRouteReleaseAfterAddrRemoval tests that releasing a Route after its
+// associated address is removed should not cause a panic.
+func TestRouteReleaseAfterAddrRemoval(t *testing.T) {
+ const (
+ nicID = 1
+ localAddr = tcpip.Address("\x01")
+ remoteAddr = tcpip.Address("\x02")
+ )
+
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ })
+
+ ep := channel.New(0, defaultMTU, "")
+ if err := s.CreateNIC(nicID, ep); err != nil {
+ t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+ }
+ if err := s.AddAddress(nicID, fakeNetNumber, localAddr); err != nil {
+ t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, fakeNetNumber, localAddr, err)
+ }
+ {
+ subnet, err := tcpip.NewSubnet("\x00", "\x00")
+ if err != nil {
+ t.Fatal(err)
+ }
+ s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+ }
+
+ r, err := s.FindRoute(nicID, localAddr, remoteAddr, fakeNetNumber, false /* multicastLoop */)
+ if err != nil {
+ t.Fatalf("s.FindRoute(%d, %s, %s, %d, false): %s", nicID, localAddr, remoteAddr, fakeNetNumber, err)
+ }
+ // Should not panic.
+ defer r.Release()
+
+ // Check that removing the same address fails.
+ if err := s.RemoveAddress(nicID, localAddr); err != nil {
+ t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, localAddr, err)
+ }
+}
+
+func TestGetNetworkEndpoint(t *testing.T) {
+ const nicID = 1
+
+ tests := []struct {
+ name string
+ protoFactory stack.NetworkProtocolFactory
+ protoNum tcpip.NetworkProtocolNumber
+ }{
+ {
+ name: "IPv4",
+ protoFactory: ipv4.NewProtocol,
+ protoNum: ipv4.ProtocolNumber,
+ },
+ {
+ name: "IPv6",
+ protoFactory: ipv6.NewProtocol,
+ protoNum: ipv6.ProtocolNumber,
+ },
+ }
+
+ factories := make([]stack.NetworkProtocolFactory, 0, len(tests))
+ for _, test := range tests {
+ factories = append(factories, test.protoFactory)
+ }
+
+ s := stack.New(stack.Options{
+ NetworkProtocols: factories,
+ })
+
+ if err := s.CreateNIC(nicID, channel.New(0, defaultMTU, "")); err != nil {
+ t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ ep, err := s.GetNetworkEndpoint(nicID, test.protoNum)
+ if err != nil {
+ t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, test.protoNum, err)
+ }
+
+ if got := ep.NetworkProtocolNumber(); got != test.protoNum {
+ t.Fatalf("got ep.NetworkProtocolNumber() = %d, want = %d", got, test.protoNum)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index 0774b5382..4ebc30ca8 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -155,7 +155,7 @@ func (epsByNIC *endpointsByNIC) transportEndpoints() []TransportEndpoint {
func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
epsByNIC.mu.RLock()
- mpep, ok := epsByNIC.endpoints[r.ref.nic.ID()]
+ mpep, ok := epsByNIC.endpoints[r.nic.ID()]
if !ok {
if mpep, ok = epsByNIC.endpoints[0]; !ok {
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
@@ -626,7 +626,7 @@ func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolN
epsByNIC.mu.RLock()
eps.mu.RUnlock()
- mpep, ok := epsByNIC.endpoints[r.ref.nic.ID()]
+ mpep, ok := epsByNIC.endpoints[r.nic.ID()]
if !ok {
if mpep, ok = epsByNIC.endpoints[0]; !ok {
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go
index 4d6d62eec..698c8609e 100644
--- a/pkg/tcpip/stack/transport_demuxer_test.go
+++ b/pkg/tcpip/stack/transport_demuxer_test.go
@@ -51,8 +51,8 @@ type testContext struct {
// newDualTestContextMultiNIC creates the testing context and also linkEpIDs NICs.
func newDualTestContextMultiNIC(t *testing.T, mtu uint32, linkEpIDs []tcpip.NICID) *testContext {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
linkEps := make(map[tcpip.NICID]*channel.Endpoint)
for _, linkEpID := range linkEpIDs {
@@ -182,8 +182,8 @@ func TestTransportDemuxerRegister(t *testing.T) {
} {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
var wq waiter.Queue
ep, err := s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index cbb34d224..62ab6d92f 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -39,7 +39,7 @@ const (
// use it.
type fakeTransportEndpoint struct {
stack.TransportEndpointInfo
- stack *stack.Stack
+
proto *fakeTransportProtocol
peerAddr tcpip.Address
route stack.Route
@@ -59,8 +59,8 @@ func (*fakeTransportEndpoint) Stats() tcpip.EndpointStats {
func (*fakeTransportEndpoint) SetOwner(owner tcpip.PacketOwner) {}
-func newFakeTransportEndpoint(s *stack.Stack, proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint {
- return &fakeTransportEndpoint{stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID}
+func newFakeTransportEndpoint(proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint {
+ return &fakeTransportEndpoint{TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID}
}
func (f *fakeTransportEndpoint) Abort() {
@@ -143,7 +143,7 @@ func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
f.peerAddr = addr.Addr
// Find the route.
- r, err := f.stack.FindRoute(addr.NIC, "", addr.Addr, fakeNetNumber, false /* multicastLoop */)
+ r, err := f.proto.stack.FindRoute(addr.NIC, "", addr.Addr, fakeNetNumber, false /* multicastLoop */)
if err != nil {
return tcpip.ErrNoRoute
}
@@ -151,7 +151,7 @@ func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
// Try to register so that we can start receiving packets.
f.ID.RemoteAddress = addr.Addr
- err = f.stack.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{fakeNetNumber}, fakeTransNumber, f.ID, f, ports.Flags{}, 0 /* bindToDevice */)
+ err = f.proto.stack.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{fakeNetNumber}, fakeTransNumber, f.ID, f, ports.Flags{}, 0 /* bindToDevice */)
if err != nil {
return err
}
@@ -190,7 +190,7 @@ func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *wai
}
func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error {
- if err := f.stack.RegisterTransportEndpoint(
+ if err := f.proto.stack.RegisterTransportEndpoint(
a.NIC,
[]tcpip.NetworkProtocolNumber{fakeNetNumber},
fakeTransNumber,
@@ -218,7 +218,6 @@ func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportE
f.proto.packetCount++
if f.acceptQueue != nil {
f.acceptQueue = append(f.acceptQueue, fakeTransportEndpoint{
- stack: f.stack,
TransportEndpointInfo: stack.TransportEndpointInfo{
ID: f.ID,
NetProto: f.NetProto,
@@ -262,6 +261,8 @@ type fakeTransportProtocolOptions struct {
// fakeTransportProtocol is a transport-layer protocol descriptor. It
// aggregates the number of packets received via endpoints of this protocol.
type fakeTransportProtocol struct {
+ stack *stack.Stack
+
packetCount int
controlCount int
opts fakeTransportProtocolOptions
@@ -271,11 +272,11 @@ func (*fakeTransportProtocol) Number() tcpip.TransportProtocolNumber {
return fakeTransNumber
}
-func (f *fakeTransportProtocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
- return newFakeTransportEndpoint(stack, f, netProto, stack.UniqueID()), nil
+func (f *fakeTransportProtocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return newFakeTransportEndpoint(f, netProto, f.stack.UniqueID()), nil
}
-func (*fakeTransportProtocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+func (*fakeTransportProtocol) NewRawEndpoint(tcpip.NetworkProtocolNumber, *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
return nil, tcpip.ErrUnknownProtocol
}
@@ -326,15 +327,15 @@ func (*fakeTransportProtocol) Parse(pkt *stack.PacketBuffer) bool {
return ok
}
-func fakeTransFactory() stack.TransportProtocol {
- return &fakeTransportProtocol{}
+func fakeTransFactory(s *stack.Stack) stack.TransportProtocol {
+ return &fakeTransportProtocol{stack: s}
}
func TestTransportReceive(t *testing.T) {
linkEP := channel.New(10, defaultMTU, "")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
})
if err := s.CreateNIC(1, linkEP); err != nil {
t.Fatalf("CreateNIC failed: %v", err)
@@ -404,8 +405,8 @@ func TestTransportReceive(t *testing.T) {
func TestTransportControlReceive(t *testing.T) {
linkEP := channel.New(10, defaultMTU, "")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
})
if err := s.CreateNIC(1, linkEP); err != nil {
t.Fatalf("CreateNIC failed: %v", err)
@@ -481,8 +482,8 @@ func TestTransportControlReceive(t *testing.T) {
func TestTransportSend(t *testing.T) {
linkEP := channel.New(10, defaultMTU, "")
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
})
if err := s.CreateNIC(1, linkEP); err != nil {
t.Fatalf("CreateNIC failed: %v", err)
@@ -527,8 +528,8 @@ func TestTransportSend(t *testing.T) {
func TestTransportOptions(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
})
v := tcpip.TCPModerateReceiveBufferOption(true)
@@ -546,8 +547,8 @@ func TestTransportOptions(t *testing.T) {
func TestTransportForwarding(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
- TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+ TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
})
s.SetForwarding(fakeNetNumber, true)
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index fa73cfa47..0d4a803ff 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -237,6 +237,14 @@ type Timer interface {
// network node. Or, in the case of unix endpoints, it may represent a path.
type Address string
+// WithPrefix returns the address with a prefix that represents a point subnet.
+func (a Address) WithPrefix() AddressWithPrefix {
+ return AddressWithPrefix{
+ Address: a,
+ PrefixLen: len(a) * 8,
+ }
+}
+
// AddressMask is a bitmask for an address.
type AddressMask string
@@ -1987,14 +1995,3 @@ func DeleteDanglingEndpoint(e Endpoint) {
// AsyncLoading is the global barrier for asynchronous endpoint loading
// activities.
var AsyncLoading sync.WaitGroup
-
-// ICMPReason is a marker interface for network protocol agnostic ICMP errors.
-type ICMPReason interface {
- isICMP()
-}
-
-// ICMPReasonPortUnreachable is an error where the transport protocol has no
-// listener and no alternative means to inform the sender.
-type ICMPReasonPortUnreachable struct{}
-
-func (*ICMPReasonPortUnreachable) isICMP() {}
diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go
index fecbe7ba7..f35dcc084 100644
--- a/pkg/tcpip/tests/integration/loopback_test.go
+++ b/pkg/tcpip/tests/integration/loopback_test.go
@@ -120,8 +120,8 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
if err := s.CreateNIC(nicID, loopback.New()); err != nil {
t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
@@ -203,7 +203,7 @@ func TestLoopbackSubnetLifetimeBoundToAddr(t *testing.T) {
otherAddr := tcpip.Address(addrBytes)
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
})
if err := s.CreateNIC(nicID, loopback.New()); err != nil {
t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
index 659acbc7a..72d86b5ab 100644
--- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go
+++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
@@ -140,11 +140,9 @@ func TestPingMulticastBroadcast(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- ipv4Proto := ipv4.NewProtocol()
- ipv6Proto := ipv6.NewProtocol()
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4Proto, ipv6Proto},
- TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4(), icmp.NewProtocol6()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4, icmp.NewProtocol6},
})
// We only expect a single packet in response to our ICMP Echo Request.
e := channel.New(1, defaultMTU, "")
@@ -176,18 +174,18 @@ func TestPingMulticastBroadcast(t *testing.T) {
var rxICMP func(*channel.Endpoint, tcpip.Address)
var expectedSrc tcpip.Address
var expectedDst tcpip.Address
- var proto stack.NetworkProtocol
+ var protoNum tcpip.NetworkProtocolNumber
switch l := len(test.dstAddr); l {
case header.IPv4AddressSize:
rxICMP = rxIPv4ICMP
expectedSrc = ipv4Addr.Address
expectedDst = remoteIPv4Addr
- proto = ipv4Proto
+ protoNum = header.IPv4ProtocolNumber
case header.IPv6AddressSize:
rxICMP = rxIPv6ICMP
expectedSrc = ipv6Addr.Address
expectedDst = remoteIPv6Addr
- proto = ipv6Proto
+ protoNum = header.IPv6ProtocolNumber
default:
t.Fatalf("got unexpected address length = %d bytes", l)
}
@@ -205,7 +203,7 @@ func TestPingMulticastBroadcast(t *testing.T) {
t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, expectedDst)
}
- src, dst := proto.ParseAddresses(pkt.Pkt.NetworkHeader().View())
+ src, dst := s.NetworkProtocolInstance(protoNum).ParseAddresses(pkt.Pkt.NetworkHeader().View())
if src != expectedSrc {
t.Errorf("got pkt source = %s, want = %s", src, expectedSrc)
}
@@ -380,8 +378,8 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
e := channel.New(0, defaultMTU, "")
if err := s.CreateNIC(nicID, e); err != nil {
@@ -466,8 +464,8 @@ func TestReuseAddrAndBroadcast(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
if err := s.CreateNIC(nicID, loopback.New()); err != nil {
t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 941c3c08d..87d510f96 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -13,12 +13,7 @@
// limitations under the License.
// Package icmp contains the implementation of the ICMP and IPv6-ICMP transport
-// protocols for use in ping. To use it in the networking stack, this package
-// must be added to the project, and activated on the stack by passing
-// icmp.NewProtocol4() and/or icmp.NewProtocol6() as one of the transport
-// protocols when calling stack.New(). Then endpoints can be created by passing
-// icmp.ProtocolNumber or icmp.ProtocolNumber6 as the transport protocol number
-// when calling Stack.NewEndpoint().
+// protocols for use in ping.
package icmp
import (
@@ -42,6 +37,8 @@ const (
// protocol implements stack.TransportProtocol.
type protocol struct {
+ stack *stack.Stack
+
number tcpip.TransportProtocolNumber
}
@@ -62,20 +59,20 @@ func (p *protocol) netProto() tcpip.NetworkProtocolNumber {
// NewEndpoint creates a new icmp endpoint. It implements
// stack.TransportProtocol.NewEndpoint.
-func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
if netProto != p.netProto() {
return nil, tcpip.ErrUnknownProtocol
}
- return newEndpoint(stack, netProto, p.number, waiterQueue)
+ return newEndpoint(p.stack, netProto, p.number, waiterQueue)
}
// NewRawEndpoint creates a new raw icmp endpoint. It implements
// stack.TransportProtocol.NewRawEndpoint.
-func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
if netProto != p.netProto() {
return nil, tcpip.ErrUnknownProtocol
}
- return raw.NewEndpoint(stack, netProto, p.number, waiterQueue)
+ return raw.NewEndpoint(p.stack, netProto, p.number, waiterQueue)
}
// MinimumPacketSize returns the minimum valid icmp packet size.
@@ -135,11 +132,11 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
}
// NewProtocol4 returns an ICMPv4 transport protocol.
-func NewProtocol4() stack.TransportProtocol {
- return &protocol{ProtocolNumber4}
+func NewProtocol4(s *stack.Stack) stack.TransportProtocol {
+ return &protocol{stack: s, number: ProtocolNumber4}
}
// NewProtocol6 returns an ICMPv6 transport protocol.
-func NewProtocol6() stack.TransportProtocol {
- return &protocol{ProtocolNumber6}
+func NewProtocol6(s *stack.Stack) stack.TransportProtocol {
+ return &protocol{stack: s, number: ProtocolNumber6}
}
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 4778e7b1c..518449602 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -94,6 +94,7 @@ go_test(
shard_count = 10,
deps = [
":tcp",
+ "//pkg/rand",
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 881752371..6891fd245 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -898,7 +898,7 @@ func (e *endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte {
// sendRaw sends a TCP segment to the endpoint's peer.
func (e *endpoint) sendRaw(data buffer.VectorisedView, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size) *tcpip.Error {
var sackBlocks []header.SACKBlock
- if e.EndpointState() == StateEstablished && e.rcv.pendingBufSize > 0 && (flags&header.TCPFlagAck != 0) {
+ if e.EndpointState() == StateEstablished && e.rcv.pendingRcvdSegments.Len() > 0 && (flags&header.TCPFlagAck != 0) {
sackBlocks = e.sack.Blocks[:e.sack.NumBlocks]
}
options := e.makeOptions(sackBlocks)
@@ -1003,9 +1003,8 @@ func (e *endpoint) transitionToStateEstablishedLocked(h *handshake) {
// (indicated by a negative send window scale).
e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale)
- rcvBufSize := seqnum.Size(e.receiveBufferSize())
e.rcvListMu.Lock()
- e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize)
+ e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale())
// Bootstrap the auto tuning algorithm. Starting at zero will
// result in a really large receive window after the first auto
// tuning adjustment.
@@ -1136,12 +1135,11 @@ func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error {
}
cont, err := e.handleSegment(s)
+ s.decRef()
if err != nil {
- s.decRef()
return err
}
if !cont {
- s.decRef()
return nil
}
}
@@ -1221,6 +1219,12 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) {
return true, nil
}
+ // Increase counter if after processing the segment we would potentially
+ // advertise a zero window.
+ if crossed, above := e.windowCrossedACKThresholdLocked(-s.segMemSize()); crossed && !above {
+ e.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
+ }
+
// Now check if the received segment has caused us to transition
// to a CLOSED state, if yes then terminate processing and do
// not invoke the sender.
@@ -1233,7 +1237,6 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) {
// or a notification from the protocolMainLoop (caller goroutine).
// This means that with this return, the segment dequeue below can
// never occur on a closed endpoint.
- s.decRef()
return false, nil
}
@@ -1425,10 +1428,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
e.rcv.nonZeroWindow()
}
- if n&notifyReceiveWindowChanged != 0 {
- e.rcv.pendingBufSize = seqnum.Size(e.receiveBufferSize())
- }
-
if n&notifyMTUChanged != 0 {
e.sndBufMu.Lock()
count := e.packetTooBigCount
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 94207c141..560b4904c 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -78,8 +78,8 @@ func testV4Connect(t *testing.T, c *context.Context, checkers ...checker.Network
ackCheckers := append(checkers, checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(iss)+1),
))
checker.IPv4(t, c.GetPacket(), ackCheckers...)
@@ -185,8 +185,8 @@ func testV6Connect(t *testing.T, c *context.Context, checkers ...checker.Network
ackCheckers := append(checkers, checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(iss)+1),
))
checker.IPv6(t, c.GetV6Packet(), ackCheckers...)
@@ -283,7 +283,7 @@ func TestV4RefuseOnV6Only(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck),
- checker.AckNum(uint32(irs)+1),
+ checker.TCPAckNum(uint32(irs)+1),
),
)
}
@@ -319,7 +319,7 @@ func TestV6RefuseOnBoundToV4Mapped(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck),
- checker.AckNum(uint32(irs)+1),
+ checker.TCPAckNum(uint32(irs)+1),
),
)
}
@@ -352,7 +352,7 @@ func testV4Accept(t *testing.T, c *context.Context) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn),
- checker.AckNum(uint32(irs)+1),
+ checker.TCPAckNum(uint32(irs)+1),
),
)
@@ -492,7 +492,7 @@ func TestV6AcceptOnV6(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn),
- checker.AckNum(uint32(irs)+1),
+ checker.TCPAckNum(uint32(irs)+1),
),
)
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 120483838..7ad894840 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -63,6 +63,17 @@ const (
StateClosing
)
+const (
+ // rcvAdvWndScale is used to split the available socket buffer into
+ // application buffer and the window to be advertised to the peer. This is
+ // currently hard coded to split the available space equally.
+ rcvAdvWndScale = 1
+
+ // SegOverheadFactor is used to multiply the value provided by the
+ // user on a SetSockOpt for setting the socket send/receive buffer sizes.
+ SegOverheadFactor = 2
+)
+
// connected returns true when s is one of the states representing an
// endpoint connected to a peer.
func (s EndpointState) connected() bool {
@@ -149,7 +160,6 @@ func (s EndpointState) String() string {
// Reasons for notifying the protocol goroutine.
const (
notifyNonZeroReceiveWindow = 1 << iota
- notifyReceiveWindowChanged
notifyClose
notifyMTUChanged
notifyDrain
@@ -384,13 +394,26 @@ type endpoint struct {
// to indicate to users that no more data is coming.
//
// rcvListMu can be taken after the endpoint mu below.
- rcvListMu sync.Mutex `state:"nosave"`
- rcvList segmentList `state:"wait"`
- rcvClosed bool
- rcvBufSize int
+ rcvListMu sync.Mutex `state:"nosave"`
+ rcvList segmentList `state:"wait"`
+ rcvClosed bool
+ // rcvBufSize is the total size of the receive buffer.
+ rcvBufSize int
+ // rcvBufUsed is the actual number of payload bytes held in the receive buffer
+ // not counting any overheads of the segments itself. NOTE: This will always
+ // be strictly <= rcvMemUsed below.
rcvBufUsed int
rcvAutoParams rcvBufAutoTuneParams
+ // rcvMemUsed tracks the total amount of memory in use by received segments
+ // held in rcvList, pendingRcvdSegments and the segment queue. This is used to
+ // compute the window and the actual available buffer space. This is distinct
+ // from rcvBufUsed above which is the actual number of payload bytes held in
+ // the buffer not including any segment overheads.
+ //
+ // rcvMemUsed must be accessed atomically.
+ rcvMemUsed int32
+
// mu protects all endpoint fields unless documented otherwise. mu must
// be acquired before interacting with the endpoint fields.
mu sync.Mutex `state:"nosave"`
@@ -891,7 +914,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.probe = p
}
- e.segmentQueue.setLimit(MaxUnprocessedSegments)
+ e.segmentQueue.ep = e
e.tsOffset = timeStampOffset()
e.acceptCond = sync.NewCond(&e.acceptMu)
@@ -904,7 +927,12 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
result := waiter.EventMask(0)
switch e.EndpointState() {
- case StateInitial, StateBound, StateConnecting, StateSynSent, StateSynRecv:
+ case StateInitial, StateBound:
+ // This prevents blocking of new sockets which are not
+ // connected when SO_LINGER is set.
+ result |= waiter.EventHUp
+
+ case StateConnecting, StateSynSent, StateSynRecv:
// Ready for nothing.
case StateClose, StateError, StateTimeWait:
@@ -1075,6 +1103,8 @@ func (e *endpoint) closeNoShutdownLocked() {
e.notifyProtocolGoroutine(notifyClose)
} else {
e.transitionToStateCloseLocked()
+ // Notify that the endpoint is closed.
+ e.waiterQueue.Notify(waiter.EventHUp)
}
}
@@ -1129,10 +1159,16 @@ func (e *endpoint) cleanupLocked() {
tcpip.DeleteDanglingEndpoint(e)
}
+// wndFromSpace returns the window that we can advertise based on the available
+// receive buffer space.
+func wndFromSpace(space int) int {
+ return space / (1 << rcvAdvWndScale)
+}
+
// initialReceiveWindow returns the initial receive window to advertise in the
// SYN/SYN-ACK.
func (e *endpoint) initialReceiveWindow() int {
- rcvWnd := e.receiveBufferAvailable()
+ rcvWnd := wndFromSpace(e.receiveBufferAvailable())
if rcvWnd > math.MaxUint16 {
rcvWnd = math.MaxUint16
}
@@ -1209,14 +1245,12 @@ func (e *endpoint) ModerateRecvBuf(copied int) {
// reject valid data that might already be in flight as the
// acceptable window will shrink.
if rcvWnd > e.rcvBufSize {
- availBefore := e.receiveBufferAvailableLocked()
+ availBefore := wndFromSpace(e.receiveBufferAvailableLocked())
e.rcvBufSize = rcvWnd
- availAfter := e.receiveBufferAvailableLocked()
- mask := uint32(notifyReceiveWindowChanged)
+ availAfter := wndFromSpace(e.receiveBufferAvailableLocked())
if crossed, above := e.windowCrossedACKThresholdLocked(availAfter - availBefore); crossed && above {
- mask |= notifyNonZeroReceiveWindow
+ e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
- e.notifyProtocolGoroutine(mask)
}
// We only update prevCopied when we grow the buffer because in cases
@@ -1293,18 +1327,22 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
v := views[s.viewToDeliver]
s.viewToDeliver++
+ var delta int
if s.viewToDeliver >= len(views) {
e.rcvList.Remove(s)
+ // We only free up receive buffer space when the segment is released as the
+ // segment is still holding on to the views even though some views have been
+ // read out to the user.
+ delta = s.segMemSize()
s.decRef()
}
e.rcvBufUsed -= len(v)
-
// If the window was small before this read and if the read freed up
// enough buffer space, to either fit an aMSS or half a receive buffer
// (whichever smaller), then notify the protocol goroutine to send a
// window update.
- if crossed, above := e.windowCrossedACKThresholdLocked(len(v)); crossed && above {
+ if crossed, above := e.windowCrossedACKThresholdLocked(delta); crossed && above {
e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
@@ -1481,11 +1519,11 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
}
// windowCrossedACKThresholdLocked checks if the receive window to be announced
-// now would be under aMSS or under half receive buffer, whichever smaller. This
-// is useful as a receive side silly window syndrome prevention mechanism. If
-// window grows to reasonable value, we should send ACK to the sender to inform
-// the rx space is now large. We also want ensure a series of small read()'s
-// won't trigger a flood of spurious tiny ACK's.
+// would be under aMSS or under the window derived from half receive buffer,
+// whichever smaller. This is useful as a receive side silly window syndrome
+// prevention mechanism. If window grows to reasonable value, we should send ACK
+// to the sender to inform the rx space is now large. We also want ensure a
+// series of small read()'s won't trigger a flood of spurious tiny ACK's.
//
// For large receive buffers, the threshold is aMSS - once reader reads more
// than aMSS we'll send ACK. For tiny receive buffers, the threshold is half of
@@ -1496,17 +1534,18 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
//
// Precondition: e.mu and e.rcvListMu must be held.
func (e *endpoint) windowCrossedACKThresholdLocked(deltaBefore int) (crossed bool, above bool) {
- newAvail := e.receiveBufferAvailableLocked()
+ newAvail := wndFromSpace(e.receiveBufferAvailableLocked())
oldAvail := newAvail - deltaBefore
if oldAvail < 0 {
oldAvail = 0
}
-
threshold := int(e.amss)
- if threshold > e.rcvBufSize/2 {
- threshold = e.rcvBufSize / 2
+ // rcvBufFraction is the inverse of the fraction of receive buffer size that
+ // is used to decide if the available buffer space is now above it.
+ const rcvBufFraction = 2
+ if wndThreshold := wndFromSpace(e.rcvBufSize / rcvBufFraction); threshold > wndThreshold {
+ threshold = wndThreshold
}
-
switch {
case oldAvail < threshold && newAvail >= threshold:
return true, true
@@ -1636,17 +1675,23 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
// Make sure the receive buffer size is within the min and max
// allowed.
var rs tcpip.TCPReceiveBufferSizeRangeOption
- if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %#v) = %s", ProtocolNumber, &rs, err))
+ }
+
+ if v > rs.Max {
+ v = rs.Max
+ }
+
+ if v < math.MaxInt32/SegOverheadFactor {
+ v *= SegOverheadFactor
if v < rs.Min {
v = rs.Min
}
- if v > rs.Max {
- v = rs.Max
- }
+ } else {
+ v = math.MaxInt32
}
- mask := uint32(notifyReceiveWindowChanged)
-
e.LockUser()
e.rcvListMu.Lock()
@@ -1660,14 +1705,9 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
v = 1 << scale
}
- // Make sure 2*size doesn't overflow.
- if v > math.MaxInt32/2 {
- v = math.MaxInt32 / 2
- }
-
- availBefore := e.receiveBufferAvailableLocked()
+ availBefore := wndFromSpace(e.receiveBufferAvailableLocked())
e.rcvBufSize = v
- availAfter := e.receiveBufferAvailableLocked()
+ availAfter := wndFromSpace(e.receiveBufferAvailableLocked())
e.rcvAutoParams.disabled = true
@@ -1675,24 +1715,31 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
// syndrome prevetion, when our available space grows above aMSS
// or half receive buffer, whichever smaller.
if crossed, above := e.windowCrossedACKThresholdLocked(availAfter - availBefore); crossed && above {
- mask |= notifyNonZeroReceiveWindow
+ e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
e.rcvListMu.Unlock()
e.UnlockUser()
- e.notifyProtocolGoroutine(mask)
case tcpip.SendBufferSizeOption:
// Make sure the send buffer size is within the min and max
// allowed.
var ss tcpip.TCPSendBufferSizeRangeOption
- if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %#v) = %s", ProtocolNumber, &ss, err))
+ }
+
+ if v > ss.Max {
+ v = ss.Max
+ }
+
+ if v < math.MaxInt32/SegOverheadFactor {
+ v *= SegOverheadFactor
if v < ss.Min {
v = ss.Min
}
- if v > ss.Max {
- v = ss.Max
- }
+ } else {
+ v = math.MaxInt32
}
e.sndBufMu.Lock()
@@ -2699,13 +2746,8 @@ func (e *endpoint) updateSndBufferUsage(v int) {
func (e *endpoint) readyToRead(s *segment) {
e.rcvListMu.Lock()
if s != nil {
+ e.rcvBufUsed += s.payloadSize()
s.incRef()
- e.rcvBufUsed += s.data.Size()
- // Increase counter if the receive window falls down below MSS
- // or half receive buffer size, whichever smaller.
- if crossed, above := e.windowCrossedACKThresholdLocked(-s.data.Size()); crossed && !above {
- e.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
- }
e.rcvList.PushBack(s)
} else {
e.rcvClosed = true
@@ -2720,15 +2762,17 @@ func (e *endpoint) readyToRead(s *segment) {
func (e *endpoint) receiveBufferAvailableLocked() int {
// We may use more bytes than the buffer size when the receive buffer
// shrinks.
- if e.rcvBufUsed >= e.rcvBufSize {
+ memUsed := e.receiveMemUsed()
+ if memUsed >= e.rcvBufSize {
return 0
}
- return e.rcvBufSize - e.rcvBufUsed
+ return e.rcvBufSize - memUsed
}
// receiveBufferAvailable calculates how many bytes are still available in the
-// receive buffer.
+// receive buffer based on the actual memory used by all segments held in
+// receive buffer/pending and segment queue.
func (e *endpoint) receiveBufferAvailable() int {
e.rcvListMu.Lock()
available := e.receiveBufferAvailableLocked()
@@ -2736,14 +2780,35 @@ func (e *endpoint) receiveBufferAvailable() int {
return available
}
+// receiveBufferUsed returns the amount of in-use receive buffer.
+func (e *endpoint) receiveBufferUsed() int {
+ e.rcvListMu.Lock()
+ used := e.rcvBufUsed
+ e.rcvListMu.Unlock()
+ return used
+}
+
+// receiveBufferSize returns the current size of the receive buffer.
func (e *endpoint) receiveBufferSize() int {
e.rcvListMu.Lock()
size := e.rcvBufSize
e.rcvListMu.Unlock()
-
return size
}
+// receiveMemUsed returns the total memory in use by segments held by this
+// endpoint.
+func (e *endpoint) receiveMemUsed() int {
+ return int(atomic.LoadInt32(&e.rcvMemUsed))
+}
+
+// updateReceiveMemUsed adds the provided delta to e.rcvMemUsed.
+func (e *endpoint) updateReceiveMemUsed(delta int) {
+ atomic.AddInt32(&e.rcvMemUsed, int32(delta))
+}
+
+// maxReceiveBufferSize returns the stack wide maximum receive buffer size for
+// an endpoint.
func (e *endpoint) maxReceiveBufferSize() int {
var rs tcpip.TCPReceiveBufferSizeRangeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
@@ -2894,7 +2959,6 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
RcvAcc: e.rcv.rcvAcc,
RcvWndScale: e.rcv.rcvWndScale,
PendingBufUsed: e.rcv.pendingBufUsed,
- PendingBufSize: e.rcv.pendingBufSize,
}
// Copy sender state.
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index 41d0050f3..b25431467 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -44,7 +44,7 @@ func (e *endpoint) drainSegmentLocked() {
// beforeSave is invoked by stateify.
func (e *endpoint) beforeSave() {
// Stop incoming packets.
- e.segmentQueue.setLimit(0)
+ e.segmentQueue.freeze()
e.mu.Lock()
defer e.mu.Unlock()
@@ -178,7 +178,7 @@ func (e *endpoint) afterLoad() {
// Resume implements tcpip.ResumableEndpoint.Resume.
func (e *endpoint) Resume(s *stack.Stack) {
e.stack = s
- e.segmentQueue.setLimit(MaxUnprocessedSegments)
+ e.segmentQueue.thaw()
epState := e.origEndpointState
switch epState {
case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 371067048..5bce73605 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -12,12 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package tcp contains the implementation of the TCP transport protocol. To use
-// it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing tcp.NewProtocol() as one of the
-// transport protocols when calling stack.New(). Then endpoints can be created
-// by passing tcp.ProtocolNumber as the transport protocol number when calling
-// Stack.NewEndpoint().
+// Package tcp contains the implementation of the TCP transport protocol.
package tcp
import (
@@ -138,6 +133,8 @@ func (s *synRcvdCounter) Threshold() uint64 {
}
type protocol struct {
+ stack *stack.Stack
+
mu sync.RWMutex
sackEnabled bool
recovery tcpip.TCPRecovery
@@ -164,14 +161,14 @@ func (*protocol) Number() tcpip.TransportProtocolNumber {
}
// NewEndpoint creates a new tcp endpoint.
-func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
- return newEndpoint(stack, netProto, waiterQueue), nil
+func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return newEndpoint(p.stack, netProto, waiterQueue), nil
}
// NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently
// unsupported. It implements stack.TransportProtocol.NewRawEndpoint.
-func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
- return raw.NewEndpoint(stack, netProto, header.TCPProtocolNumber, waiterQueue)
+func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return raw.NewEndpoint(p.stack, netProto, header.TCPProtocolNumber, waiterQueue)
}
// MinimumPacketSize returns the minimum valid tcp packet size.
@@ -510,8 +507,9 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
}
// NewProtocol returns a TCP transport protocol.
-func NewProtocol() stack.TransportProtocol {
+func NewProtocol(s *stack.Stack) stack.TransportProtocol {
p := protocol{
+ stack: s,
sendBufferSize: tcpip.TCPSendBufferSizeRangeOption{
Min: MinBufferSize,
Default: DefaultSendBufferSize,
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index cfd43b5e3..48bf196d8 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -47,22 +47,24 @@ type receiver struct {
closed bool
+ // pendingRcvdSegments is bounded by the receive buffer size of the
+ // endpoint.
pendingRcvdSegments segmentHeap
- pendingBufUsed seqnum.Size
- pendingBufSize seqnum.Size
+ // pendingBufUsed tracks the total number of bytes (including segment
+ // overhead) currently queued in pendingRcvdSegments.
+ pendingBufUsed int
// Time when the last ack was received.
lastRcvdAckTime time.Time `state:".(unixTime)"`
}
-func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver {
+func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver {
return &receiver{
ep: ep,
rcvNxt: irs + 1,
rcvAcc: irs.Add(rcvWnd + 1),
rcvWnd: rcvWnd,
rcvWndScale: rcvWndScale,
- pendingBufSize: pendingBufSize,
lastRcvdAckTime: time.Now(),
}
}
@@ -85,15 +87,30 @@ func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
// getSendParams returns the parameters needed by the sender when building
// segments to send.
func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
- // Calculate the window size based on the available buffer space.
- receiveBufferAvailable := r.ep.receiveBufferAvailable()
- acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable))
- if r.rcvAcc.LessThan(acc) {
- r.rcvAcc = acc
+ avail := wndFromSpace(r.ep.receiveBufferAvailable())
+ if avail == 0 {
+ // We have no space available to accept any data, move to zero window
+ // state.
+ r.rcvWnd = 0
+ return r.rcvNxt, 0
+ }
+
+ acc := r.rcvNxt.Add(seqnum.Size(avail))
+ newWnd := r.rcvNxt.Size(acc)
+ curWnd := r.rcvNxt.Size(r.rcvAcc)
+
+ // Update rcvAcc only if new window is > previously advertised window. We
+ // should never shrink the acceptable sequence space once it has been
+ // advertised the peer. If we shrink the acceptable sequence space then we
+ // would end up dropping bytes that might already be in flight.
+ if newWnd > curWnd {
+ r.rcvAcc = r.rcvNxt.Add(newWnd)
+ } else {
+ newWnd = curWnd
}
// Stash away the non-scaled receive window as we use it for measuring
// receiver's estimated RTT.
- r.rcvWnd = r.rcvNxt.Size(r.rcvAcc)
+ r.rcvWnd = newWnd
return r.rcvNxt, r.rcvWnd >> r.rcvWndScale
}
@@ -195,7 +212,9 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum
}
for i := first; i < len(r.pendingRcvdSegments); i++ {
+ r.pendingBufUsed -= r.pendingRcvdSegments[i].segMemSize()
r.pendingRcvdSegments[i].decRef()
+
// Note that slice truncation does not allow garbage collection of
// truncated items, thus truncated items must be set to nil to avoid
// memory leaks.
@@ -384,10 +403,16 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
// Defer segment processing if it can't be consumed now.
if !r.consumeSegment(s, segSeq, segLen) {
if segLen > 0 || s.flagIsSet(header.TCPFlagFin) {
- // We only store the segment if it's within our buffer
- // size limit.
- if r.pendingBufUsed < r.pendingBufSize {
- r.pendingBufUsed += seqnum.Size(s.segMemSize())
+ // We only store the segment if it's within our buffer size limit.
+ //
+ // Only use 75% of the receive buffer queue for out-of-order
+ // segments. This ensures that we always leave some space for the inorder
+ // segments to arrive allowing pending segments to be processed and
+ // delivered to the user.
+ if r.ep.receiveBufferAvailable() > 0 && r.pendingBufUsed < r.ep.receiveBufferSize()>>2 {
+ r.ep.rcvListMu.Lock()
+ r.pendingBufUsed += s.segMemSize()
+ r.ep.rcvListMu.Unlock()
s.incRef()
heap.Push(&r.pendingRcvdSegments, s)
UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
@@ -421,7 +446,9 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
}
heap.Pop(&r.pendingRcvdSegments)
- r.pendingBufUsed -= seqnum.Size(s.segMemSize())
+ r.ep.rcvListMu.Lock()
+ r.pendingBufUsed -= s.segMemSize()
+ r.ep.rcvListMu.Unlock()
s.decRef()
}
return false, nil
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 94307d31a..13acaf753 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -15,6 +15,7 @@
package tcp
import (
+ "fmt"
"sync/atomic"
"time"
@@ -24,6 +25,15 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
+// queueFlags are used to indicate which queue of an endpoint a particular segment
+// belongs to. This is used to track memory accounting correctly.
+type queueFlags uint8
+
+const (
+ recvQ queueFlags = 1 << iota
+ sendQ
+)
+
// segment represents a TCP segment. It holds the payload and parsed TCP segment
// information, and can be added to intrusive lists.
// segment is mostly immutable, the only field allowed to change is viewToDeliver.
@@ -32,6 +42,8 @@ import (
type segment struct {
segmentEntry
refCnt int32
+ ep *endpoint
+ qFlags queueFlags
id stack.TransportEndpointID `state:"manual"`
route stack.Route `state:"manual"`
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
@@ -100,6 +112,8 @@ func (s *segment) clone() *segment {
rcvdTime: s.rcvdTime,
xmitTime: s.xmitTime,
xmitCount: s.xmitCount,
+ ep: s.ep,
+ qFlags: s.qFlags,
}
t.data = s.data.Clone(t.views[:])
return t
@@ -115,8 +129,34 @@ func (s *segment) flagsAreSet(flags uint8) bool {
return s.flags&flags == flags
}
+// setOwner sets the owning endpoint for this segment. Its required
+// to be called to ensure memory accounting for receive/send buffer
+// queues is done properly.
+func (s *segment) setOwner(ep *endpoint, qFlags queueFlags) {
+ switch qFlags {
+ case recvQ:
+ ep.updateReceiveMemUsed(s.segMemSize())
+ case sendQ:
+ // no memory account for sendQ yet.
+ default:
+ panic(fmt.Sprintf("unexpected queue flag %b", qFlags))
+ }
+ s.ep = ep
+ s.qFlags = qFlags
+}
+
func (s *segment) decRef() {
if atomic.AddInt32(&s.refCnt, -1) == 0 {
+ if s.ep != nil {
+ switch s.qFlags {
+ case recvQ:
+ s.ep.updateReceiveMemUsed(-s.segMemSize())
+ case sendQ:
+ // no memory accounting for sendQ yet.
+ default:
+ panic(fmt.Sprintf("unexpected queue flag %b set for segment", s.qFlags))
+ }
+ }
s.route.Release()
}
}
@@ -138,6 +178,11 @@ func (s *segment) logicalLen() seqnum.Size {
return l
}
+// payloadSize is the size of s.data.
+func (s *segment) payloadSize() int {
+ return s.data.Size()
+}
+
// segMemSize is the amount of memory used to hold the segment data and
// the associated metadata.
func (s *segment) segMemSize() int {
diff --git a/pkg/tcpip/transport/tcp/segment_queue.go b/pkg/tcpip/transport/tcp/segment_queue.go
index 48a257137..54545a1b1 100644
--- a/pkg/tcpip/transport/tcp/segment_queue.go
+++ b/pkg/tcpip/transport/tcp/segment_queue.go
@@ -22,16 +22,16 @@ import (
//
// +stateify savable
type segmentQueue struct {
- mu sync.Mutex `state:"nosave"`
- list segmentList `state:"wait"`
- limit int
- used int
+ mu sync.Mutex `state:"nosave"`
+ list segmentList `state:"wait"`
+ ep *endpoint
+ frozen bool
}
// emptyLocked determines if the queue is empty.
// Preconditions: q.mu must be held.
func (q *segmentQueue) emptyLocked() bool {
- return q.used == 0
+ return q.list.Empty()
}
// empty determines if the queue is empty.
@@ -43,14 +43,6 @@ func (q *segmentQueue) empty() bool {
return r
}
-// setLimit updates the limit. No segments are immediately dropped in case the
-// queue becomes full due to the new limit.
-func (q *segmentQueue) setLimit(limit int) {
- q.mu.Lock()
- q.limit = limit
- q.mu.Unlock()
-}
-
// enqueue adds the given segment to the queue.
//
// Returns true when the segment is successfully added to the queue, in which
@@ -58,15 +50,23 @@ func (q *segmentQueue) setLimit(limit int) {
// false if the queue is full, in which case ownership is retained by the
// caller.
func (q *segmentQueue) enqueue(s *segment) bool {
+ // q.ep.receiveBufferParams() must be called without holding q.mu to
+ // avoid lock order inversion.
+ bufSz := q.ep.receiveBufferSize()
+ used := q.ep.receiveMemUsed()
q.mu.Lock()
- r := q.used < q.limit
- if r {
+ // Allow zero sized segments (ACK/FIN/RSTs etc even if the segment queue
+ // is currently full).
+ allow := (used <= bufSz || s.payloadSize() == 0) && !q.frozen
+
+ if allow {
q.list.PushBack(s)
- q.used++
+ // Set the owner now that the endpoint owns the segment.
+ s.setOwner(q.ep, recvQ)
}
q.mu.Unlock()
- return r
+ return allow
}
// dequeue removes and returns the next segment from queue, if one exists.
@@ -77,9 +77,25 @@ func (q *segmentQueue) dequeue() *segment {
s := q.list.Front()
if s != nil {
q.list.Remove(s)
- q.used--
}
q.mu.Unlock()
return s
}
+
+// freeze prevents any more segments from being added to the queue. i.e all
+// future segmentQueue.enqueue will return false and not add the segment to the
+// queue till the queue is unfroze with a corresponding segmentQueue.thaw call.
+func (q *segmentQueue) freeze() {
+ q.mu.Lock()
+ q.frozen = true
+ q.mu.Unlock()
+}
+
+// thaw unfreezes a previously frozen queue using segmentQueue.freeze() and
+// allows new segments to be queued again.
+func (q *segmentQueue) thaw() {
+ q.mu.Lock()
+ q.frozen = false
+ q.mu.Unlock()
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 8b2217a98..5b504d0d1 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -21,6 +21,7 @@ import (
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -349,8 +350,8 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
SrcPort: context.TestPort,
@@ -380,8 +381,8 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(0),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(0),
checker.TCPFlags(header.TCPFlagRst)))
}
@@ -479,8 +480,8 @@ func TestConnectResetAfterClose(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -521,8 +522,8 @@ func TestConnectResetAfterClose(t *testing.T) {
// RST is always generated with sndNxt which if the FIN
// has been sent will be 1 higher than the sequence number
// of the FIN itself.
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(0),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(0),
checker.TCPFlags(header.TCPFlagRst),
),
)
@@ -561,8 +562,8 @@ func TestCurrentConnectedIncrement(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -597,8 +598,8 @@ func TestCurrentConnectedIncrement(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(791),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -644,8 +645,8 @@ func TestClosingWithEnqueuedSegments(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(791),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -665,8 +666,8 @@ func TestClosingWithEnqueuedSegments(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(791),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -725,8 +726,8 @@ func TestClosingWithEnqueuedSegments(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(0),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(0),
checker.TCPFlags(header.TCPFlagRst),
),
)
@@ -777,8 +778,8 @@ func TestSimpleReceive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1030,7 +1031,7 @@ func TestSendRstOnListenerRxSynAckV4(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
- checker.SeqNum(200)))
+ checker.TCPSeqNum(200)))
}
func TestSendRstOnListenerRxSynAckV6(t *testing.T) {
@@ -1058,7 +1059,7 @@ func TestSendRstOnListenerRxSynAckV6(t *testing.T) {
checker.IPv6(t, c.GetV6Packet(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
- checker.SeqNum(200)))
+ checker.TCPSeqNum(200)))
}
// TestTCPAckBeforeAcceptV4 tests that once the 3-way handshake is complete,
@@ -1095,8 +1096,8 @@ func TestTCPAckBeforeAcceptV4(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
}
// TestTCPAckBeforeAcceptV6 tests that once the 3-way handshake is complete,
@@ -1133,8 +1134,8 @@ func TestTCPAckBeforeAcceptV6(t *testing.T) {
checker.IPv6(t, c.GetV6Packet(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
}
func TestSendRstOnListenerRxAckV4(t *testing.T) {
@@ -1162,7 +1163,7 @@ func TestSendRstOnListenerRxAckV4(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
- checker.SeqNum(200)))
+ checker.TCPSeqNum(200)))
}
func TestSendRstOnListenerRxAckV6(t *testing.T) {
@@ -1190,7 +1191,7 @@ func TestSendRstOnListenerRxAckV6(t *testing.T) {
checker.IPv6(t, c.GetV6Packet(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
- checker.SeqNum(200)))
+ checker.TCPSeqNum(200)))
}
// TestListenShutdown tests for the listening endpoint replying with RST
@@ -1306,8 +1307,8 @@ func TestTOSV4(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790), // Acknum is initial sequence number + 1
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790), // Acknum is initial sequence number + 1
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
checker.TOS(tos, 0),
@@ -1355,8 +1356,8 @@ func TestTrafficClassV6(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
checker.TOS(tos, 0),
@@ -1546,8 +1547,8 @@ func TestOutOfOrderReceive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1597,8 +1598,8 @@ func TestOutOfOrderReceive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1608,8 +1609,8 @@ func TestOutOfOrderFlood(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- // Create a new connection with initial window size of 10.
- c.CreateConnected(789, 30000, 10)
+ rcvBufSz := math.MaxUint16
+ c.CreateConnected(789, 30000, rcvBufSz)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrWouldBlock)
@@ -1630,8 +1631,8 @@ func TestOutOfOrderFlood(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1651,8 +1652,8 @@ func TestOutOfOrderFlood(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1671,8 +1672,8 @@ func TestOutOfOrderFlood(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(793),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(793),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1713,8 +1714,8 @@ func TestRstOnCloseWithUnreadData(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1728,7 +1729,7 @@ func TestRstOnCloseWithUnreadData(t *testing.T) {
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst),
// We shouldn't consume a sequence number on RST.
- checker.SeqNum(uint32(c.IRS)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
))
// The RST puts the endpoint into an error state.
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want {
@@ -1782,8 +1783,8 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -1796,7 +1797,7 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) {
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
- checker.SeqNum(uint32(c.IRS)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
))
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateFinWait1; got != want {
@@ -1815,7 +1816,7 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) {
// RST is always generated with sndNxt which if the FIN
// has been sent will be 1 higher than the sequence
// number of the FIN itself.
- checker.SeqNum(uint32(c.IRS)+2),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
))
// The RST puts the endpoint into an error state.
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want {
@@ -1861,7 +1862,8 @@ func TestFullWindowReceive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- c.CreateConnected(789, 30000, 10)
+ const rcvBufSz = 10
+ c.CreateConnected(789, 30000, rcvBufSz)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
@@ -1872,8 +1874,13 @@ func TestFullWindowReceive(t *testing.T) {
t.Fatalf("Read failed: %s", err)
}
- // Fill up the window.
- data := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
+ // Fill up the window w/ tcp.SegOverheadFactor*rcvBufSz as netstack multiplies
+ // the provided buffer value by tcp.SegOverheadFactor to calculate the actual
+ // receive buffer size.
+ data := make([]byte, tcp.SegOverheadFactor*rcvBufSz)
+ for i := range data {
+ data[i] = byte(i % 255)
+ }
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
@@ -1894,10 +1901,10 @@ func TestFullWindowReceive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
- checker.Window(0),
+ checker.TCPWindow(0),
),
)
@@ -1920,10 +1927,10 @@ func TestFullWindowReceive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
- checker.Window(10),
+ checker.TCPWindow(10),
),
)
}
@@ -1932,12 +1939,15 @@ func TestNoWindowShrinking(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- // Start off with a window size of 10, then shrink it to 5.
- c.CreateConnected(789, 30000, 10)
-
- if err := c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 5); err != nil {
- t.Fatalf("SetSockOptInt(ReceiveBufferSizeOption, 5) failed: %s", err)
- }
+ // Start off with a certain receive buffer then cut it in half and verify that
+ // the right edge of the window does not shrink.
+ // NOTE: Netstack doubles the value specified here.
+ rcvBufSize := 65536
+ iss := seqnum.Value(789)
+ // Enable window scaling with a scale of zero from our end.
+ c.CreateConnectedWithRawOptions(iss, 30000, rcvBufSize, []byte{
+ header.TCPOptionWS, 3, 0, header.TCPOptionNOP,
+ })
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
@@ -1946,14 +1956,15 @@ func TestNoWindowShrinking(t *testing.T) {
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrWouldBlock)
}
-
- // Send 3 bytes, check that the peer acknowledges them.
- data := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- c.SendPacket(data[:3], &context.Headers{
+ // Send a 1 byte payload so that we can record the current receive window.
+ // Send a payload of half the size of rcvBufSize.
+ seqNum := iss.Add(1)
+ payload := []byte{1}
+ c.SendPacket(payload, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
- SeqNum: 790,
+ SeqNum: seqNum,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
@@ -1965,46 +1976,93 @@ func TestNoWindowShrinking(t *testing.T) {
t.Fatalf("Timed out waiting for data to arrive")
}
- // Check that data is acknowledged, and that window doesn't go to zero
- // just yet because it was previously set to 10. It must go to 7 now.
- checker.IPv4(t, c.GetPacket(),
+ // Read the 1 byte payload we just sent.
+ v, _, err := c.EP.Read(nil)
+ if err != nil {
+ t.Fatalf("Read failed: %s", err)
+ }
+ if got, want := payload, v; !bytes.Equal(got, want) {
+ t.Fatalf("got data: %v, want: %v", got, want)
+ }
+
+ seqNum = seqNum.Add(1)
+ // Verify that the ACK does not shrink the window.
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(793),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(seqNum)),
checker.TCPFlags(header.TCPFlagAck),
- checker.Window(7),
),
)
+ // Stash the initial window.
+ initialWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize() << c.RcvdWindowScale
+ initialLastAcceptableSeq := seqNum.Add(seqnum.Size(initialWnd))
+ // Now shrink the receive buffer to half its original size.
+ if err := c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, rcvBufSize/2); err != nil {
+ t.Fatalf("SetSockOptInt(ReceiveBufferSizeOption, 5) failed: %s", err)
+ }
- // Send 7 more bytes, check that the window fills up.
- c.SendPacket(data[3:], &context.Headers{
+ data := generateRandomPayload(t, rcvBufSize)
+ // Send a payload of half the size of rcvBufSize.
+ c.SendPacket(data[:rcvBufSize/2], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
- SeqNum: 793,
+ SeqNum: seqNum,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
+ seqNum = seqNum.Add(seqnum.Size(rcvBufSize / 2))
- select {
- case <-ch:
- case <-time.After(5 * time.Second):
- t.Fatalf("Timed out waiting for data to arrive")
+ // Verify that the ACK does not shrink the window.
+ pkt = c.GetPacket()
+ checker.IPv4(t, pkt,
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(seqNum)),
+ checker.TCPFlags(header.TCPFlagAck),
+ ),
+ )
+ newWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize() << c.RcvdWindowScale
+ newLastAcceptableSeq := seqNum.Add(seqnum.Size(newWnd))
+ if newLastAcceptableSeq.LessThan(initialLastAcceptableSeq) {
+ t.Fatalf("receive window shrunk unexpectedly got: %d, want >= %d", newLastAcceptableSeq, initialLastAcceptableSeq)
}
+ // Send another payload of half the size of rcvBufSize. This should fill up the
+ // socket receive buffer and we should see a zero window.
+ c.SendPacket(data[rcvBufSize/2:], &context.Headers{
+ SrcPort: context.TestPort,
+ DstPort: c.Port,
+ Flags: header.TCPFlagAck,
+ SeqNum: seqNum,
+ AckNum: c.IRS.Add(1),
+ RcvWnd: 30000,
+ })
+ seqNum = seqNum.Add(seqnum.Size(rcvBufSize / 2))
+
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(seqNum)),
checker.TCPFlags(header.TCPFlagAck),
- checker.Window(0),
+ checker.TCPWindow(0),
),
)
+ // Wait for receive to be notified.
+ select {
+ case <-ch:
+ case <-time.After(5 * time.Second):
+ t.Fatalf("Timed out waiting for data to arrive")
+ }
+
// Receive data and check it.
- read := make([]byte, 0, 10)
+ read := make([]byte, 0, rcvBufSize)
for len(read) < len(data) {
v, _, err := c.EP.Read(nil)
if err != nil {
@@ -2018,15 +2076,15 @@ func TestNoWindowShrinking(t *testing.T) {
t.Fatalf("got data = %v, want = %v", read, data)
}
- // Check that we get an ACK for the newly non-zero window, which is the
- // new size.
+ // Check that we get an ACK for the newly non-zero window, which is the new
+ // receive buffer size we set after the connection was established.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(seqNum)),
checker.TCPFlags(header.TCPFlagAck),
- checker.Window(5),
+ checker.TCPWindow(uint16(rcvBufSize/2)>>c.RcvdWindowScale),
),
)
}
@@ -2051,8 +2109,8 @@ func TestSimpleSend(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2093,8 +2151,8 @@ func TestZeroWindowSend(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2115,8 +2173,8 @@ func TestZeroWindowSend(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2155,16 +2213,16 @@ func TestScaledWindowConnect(t *testing.T) {
t.Fatalf("Write failed: %s", err)
}
- // Check that data is received, and that advertised window is 0xbfff,
+ // Check that data is received, and that advertised window is 0x5fff,
// that is, that it is scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(0xbfff),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(0x5fff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2194,9 +2252,9 @@ func TestNonScaledWindowConnect(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(0xffff),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(0xffff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2230,7 +2288,8 @@ func TestScaledWindowAccept(t *testing.T) {
}
// Do 3-way handshake.
- c.PassiveConnectWithOptions(100, 2, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ // wndScale expected is 3 as 65535 * 3 * 2 < 65535 * 2^3 but > 65535 *2 *2
+ c.PassiveConnectWithOptions(100, 3 /* wndScale */, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -2260,16 +2319,16 @@ func TestScaledWindowAccept(t *testing.T) {
t.Fatalf("Write failed: %s", err)
}
- // Check that data is received, and that advertised window is 0xbfff,
+ // Check that data is received, and that advertised window is 0x5fff,
// that is, that it is scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(0xbfff),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(0x5fff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2341,9 +2400,9 @@ func TestNonScaledWindowAccept(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(0xffff),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(0xffff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2356,18 +2415,19 @@ func TestZeroScaledWindowReceive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- // Set the window size such that a window scale of 4 will be used.
- const wnd = 65535 * 10
- const ws = uint32(4)
- c.CreateConnectedWithRawOptions(789, 30000, wnd, []byte{
+ // Set the buffer size such that a window scale of 5 will be used.
+ const bufSz = 65535 * 10
+ const ws = uint32(5)
+ c.CreateConnectedWithRawOptions(789, 30000, bufSz, []byte{
header.TCPOptionWS, 3, 0, header.TCPOptionNOP,
})
// Write chunks of 50000 bytes.
- remain := wnd
+ remain := 0
sent := 0
data := make([]byte, 50000)
- for remain > len(data) {
+ // Keep writing till the window drops below len(data).
+ for {
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
@@ -2377,21 +2437,25 @@ func TestZeroScaledWindowReceive(t *testing.T) {
RcvWnd: 30000,
})
sent += len(data)
- remain -= len(data)
- checker.IPv4(t, c.GetPacket(),
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(uint16(remain>>ws)),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
checker.TCPFlags(header.TCPFlagAck),
),
)
+ // Don't reduce window to zero here.
+ if wnd := int(header.TCP(header.IPv4(pkt).Payload()).WindowSize()); wnd<<ws < len(data) {
+ remain = wnd << ws
+ break
+ }
}
// Make the window non-zero, but the scaled window zero.
- if remain >= 16 {
+ for remain >= 16 {
data = data[:remain-15]
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
@@ -2402,22 +2466,35 @@ func TestZeroScaledWindowReceive(t *testing.T) {
RcvWnd: 30000,
})
sent += len(data)
- remain -= len(data)
- checker.IPv4(t, c.GetPacket(),
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(0),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
checker.TCPFlags(header.TCPFlagAck),
),
)
+ // Since the receive buffer is split between window advertisement and
+ // application data buffer the window does not always reflect the space
+ // available and actual space available can be a bit more than what is
+ // advertised in the window.
+ wnd := int(header.TCP(header.IPv4(pkt).Payload()).WindowSize())
+ if wnd == 0 {
+ break
+ }
+ remain = wnd << ws
}
- // Read at least 1MSS of data. An ack should be sent in response to that.
+ // Read at least 2MSS of data. An ack should be sent in response to that.
+ // Since buffer space is now split in half between window and application
+ // data we need to read more than 1 MSS(65536) of data for a non-zero window
+ // update to be sent. For 1MSS worth of window to be available we need to
+ // read at least 128KB. Since our segments above were 50KB each it means
+ // we need to read at 3 packets.
sz := 0
- for sz < defaultMTU {
+ for sz < defaultMTU*2 {
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %s", err)
@@ -2429,9 +2506,9 @@ func TestZeroScaledWindowReceive(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(uint16(sz>>ws)),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
+ checker.TCPWindowGreaterThanEq(uint16(defaultMTU>>ws)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -2498,8 +2575,8 @@ func TestSegmentMerging(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize+1),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+uint32(i)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+uint32(i)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2521,8 +2598,8 @@ func TestSegmentMerging(t *testing.T) {
checker.PayloadLen(len(allData)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+11),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+11),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2569,8 +2646,8 @@ func TestDelay(t *testing.T) {
checker.PayloadLen(len(want)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(seq)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(seq)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2616,8 +2693,8 @@ func TestUndelay(t *testing.T) {
checker.PayloadLen(len(allData[0])+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(seq)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(seq)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2639,8 +2716,8 @@ func TestUndelay(t *testing.T) {
checker.PayloadLen(len(allData[1])+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(seq)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(seq)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2701,8 +2778,8 @@ func TestMSSNotDelayed(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(seq)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(seq)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2753,8 +2830,8 @@ func testBrokenUpWrite(t *testing.T, c *context.Context, maxPayload int) {
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1+uint32(bytesReceived)),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1+uint32(bytesReceived)),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -2996,7 +3073,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) {
// Set the buffer size to a deterministic size so that we can check the
// window scaling option.
const rcvBufferSize = 0x20000
- const wndScale = 2
+ const wndScale = 3
if err := c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, rcvBufferSize); err != nil {
t.Fatalf("SetSockOptInt(ReceiveBufferSizeOption, %d) failed failed: %s", rcvBufferSize, err)
}
@@ -3031,7 +3108,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) {
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
checker.SrcPort(tcpHdr.SourcePort()),
- checker.SeqNum(tcpHdr.SequenceNumber()),
+ checker.TCPSeqNum(tcpHdr.SequenceNumber()),
checker.TCPSynOptions(header.TCPSynOptions{MSS: mss, WS: wndScale}),
),
)
@@ -3052,8 +3129,8 @@ func TestSynOptionsOnActiveConnect(t *testing.T) {
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(iss)+1),
),
)
@@ -3346,8 +3423,8 @@ func TestFinImmediately(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3367,8 +3444,8 @@ func TestFinImmediately(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(791),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -3389,8 +3466,8 @@ func TestFinRetransmit(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3400,8 +3477,8 @@ func TestFinRetransmit(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3421,8 +3498,8 @@ func TestFinRetransmit(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(791),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -3445,8 +3522,8 @@ func TestFinWithNoPendingData(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3470,8 +3547,8 @@ func TestFinWithNoPendingData(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3492,8 +3569,8 @@ func TestFinWithNoPendingData(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -3520,8 +3597,8 @@ func TestFinWithPendingDataCwndFull(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3539,8 +3616,8 @@ func TestFinWithPendingDataCwndFull(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3559,8 +3636,8 @@ func TestFinWithPendingDataCwndFull(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3580,8 +3657,8 @@ func TestFinWithPendingDataCwndFull(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -3604,8 +3681,8 @@ func TestFinWithPendingData(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3629,8 +3706,8 @@ func TestFinWithPendingData(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3645,8 +3722,8 @@ func TestFinWithPendingData(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3666,8 +3743,8 @@ func TestFinWithPendingData(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -3691,8 +3768,8 @@ func TestFinWithPartialAck(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3712,8 +3789,8 @@ func TestFinWithPartialAck(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3727,8 +3804,8 @@ func TestFinWithPartialAck(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3743,8 +3820,8 @@ func TestFinWithPartialAck(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(791),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(791),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -3835,8 +3912,8 @@ func scaledSendWindow(t *testing.T, scale uint8) {
checker.PayloadLen((1<<scale)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -3974,7 +4051,7 @@ func TestReceivedSegmentQueuing(t *testing.T) {
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -4025,8 +4102,8 @@ func TestReadAfterClosedState(t *testing.T) {
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
@@ -4050,8 +4127,8 @@ func TestReadAfterClosedState(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+2),
- checker.AckNum(uint32(791+len(data))),
+ checker.TCPSeqNum(uint32(c.IRS)+2),
+ checker.TCPAckNum(uint32(791+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -4223,8 +4300,8 @@ func checkSendBufferSize(t *testing.T, ep tcpip.Endpoint, v int) {
func TestDefaultBufferSizes(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
// Check the default values.
@@ -4286,8 +4363,8 @@ func TestDefaultBufferSizes(t *testing.T) {
func TestMinMaxBufferSizes(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
// Check the default values.
@@ -4312,14 +4389,14 @@ func TestMinMaxBufferSizes(t *testing.T) {
}
}
- // Set values below the min.
- if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 199); err != nil {
+ // Set values below the min/2.
+ if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 99); err != nil {
t.Fatalf("SetSockOptInt(ReceiveBufferSizeOption, 199) failed: %s", err)
}
checkRecvBufferSize(t, ep, 200)
- if err := ep.SetSockOptInt(tcpip.SendBufferSizeOption, 299); err != nil {
+ if err := ep.SetSockOptInt(tcpip.SendBufferSizeOption, 149); err != nil {
t.Fatalf("SetSockOptInt(SendBufferSizeOption, 299) failed: %s", err)
}
@@ -4330,19 +4407,21 @@ func TestMinMaxBufferSizes(t *testing.T) {
t.Fatalf("SetSockOptInt(ReceiveBufferSizeOption) failed: %s", err)
}
- checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20)
+ // Values above max are capped at max and then doubled.
+ checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20*2)
if err := ep.SetSockOptInt(tcpip.SendBufferSizeOption, 1+tcp.DefaultSendBufferSize*30); err != nil {
t.Fatalf("SetSockOptInt(SendBufferSizeOption) failed: %s", err)
}
- checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30)
+ // Values above max are capped at max and then doubled.
+ checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30*2)
}
func TestBindToDeviceOption(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()}})
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol}})
ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
@@ -4391,11 +4470,11 @@ func TestBindToDeviceOption(t *testing.T) {
func makeStack() (*stack.Stack, *tcpip.Error) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{
- ipv4.NewProtocol(),
- ipv6.NewProtocol(),
+ NetworkProtocols: []stack.NetworkProtocolFactory{
+ ipv4.NewProtocol,
+ ipv6.NewProtocol,
},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
id := loopback.New()
@@ -4678,8 +4757,8 @@ func TestPathMTUDiscovery(t *testing.T) {
checker.PayloadLen(size+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(seqNum),
- checker.AckNum(790),
+ checker.TCPSeqNum(seqNum),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -4930,8 +5009,8 @@ func TestKeepalive(t *testing.T) {
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)),
- checker.AckNum(uint32(790)),
+ checker.TCPSeqNum(uint32(c.IRS)),
+ checker.TCPAckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -4964,8 +5043,8 @@ func TestKeepalive(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -4976,8 +5055,8 @@ func TestKeepalive(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagPsh),
),
)
@@ -5002,8 +5081,8 @@ func TestKeepalive(t *testing.T) {
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(next-1)),
- checker.AckNum(uint32(790)),
+ checker.TCPSeqNum(uint32(next-1)),
+ checker.TCPAckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -5029,8 +5108,8 @@ func TestKeepalive(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(next)),
- checker.AckNum(uint32(0)),
+ checker.TCPSeqNum(uint32(next)),
+ checker.TCPAckNum(uint32(0)),
checker.TCPFlags(header.TCPFlagRst),
),
)
@@ -5070,7 +5149,7 @@ func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCooki
checker.SrcPort(context.StackPort),
checker.DstPort(srcPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
- checker.AckNum(uint32(irs) + 1),
+ checker.TCPAckNum(uint32(irs) + 1),
}
if synCookieInUse {
@@ -5114,7 +5193,7 @@ func executeV6Handshake(t *testing.T, c *context.Context, srcPort uint16, synCoo
checker.SrcPort(context.StackPort),
checker.DstPort(srcPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
- checker.AckNum(uint32(irs) + 1),
+ checker.TCPAckNum(uint32(irs) + 1),
}
if synCookieInUse {
@@ -5348,7 +5427,7 @@ func TestListenNoAcceptNonUnicastV4(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn),
- checker.AckNum(uint32(irs)+1)))
+ checker.TCPAckNum(uint32(irs)+1)))
})
}
}
@@ -5356,8 +5435,8 @@ func TestListenNoAcceptNonUnicastV4(t *testing.T) {
// TestListenNoAcceptMulticastBroadcastV6 makes sure that TCP segments with a
// non unicast IPv6 address are not accepted.
func TestListenNoAcceptNonUnicastV6(t *testing.T) {
- multicastAddr := tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01")
- otherMulticastAddr := tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02")
+ multicastAddr := tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01")
+ otherMulticastAddr := tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02")
tests := []struct {
name string
@@ -5448,7 +5527,7 @@ func TestListenNoAcceptNonUnicastV6(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn),
- checker.AckNum(uint32(irs)+1)))
+ checker.TCPAckNum(uint32(irs)+1)))
})
}
}
@@ -5496,7 +5575,7 @@ func TestListenSynRcvdQueueFull(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
- checker.AckNum(uint32(irs) + 1),
+ checker.TCPAckNum(uint32(irs) + 1),
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
@@ -5674,7 +5753,7 @@ func TestSynRcvdBadSeqNumber(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
- checker.AckNum(uint32(irs) + 1),
+ checker.TCPAckNum(uint32(irs) + 1),
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
@@ -5695,8 +5774,8 @@ func TestSynRcvdBadSeqNumber(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.AckNum(uint32(irs) + 1),
- checker.SeqNum(uint32(iss + 1)),
+ checker.TCPAckNum(uint32(irs) + 1),
+ checker.TCPSeqNum(uint32(iss + 1)),
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
@@ -5994,16 +6073,14 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
time.Sleep(latency)
rawEP.SendPacketWithTS([]byte{1}, tsVal)
- // Verify that the ACK has the expected window.
- wantRcvWnd := receiveBufferSize
- wantRcvWnd = (wantRcvWnd >> uint32(c.WindowScale))
- rawEP.VerifyACKRcvWnd(uint16(wantRcvWnd - 1))
+ pkt := rawEP.VerifyAndReturnACKWithTS(tsVal)
+ rcvWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize()
time.Sleep(25 * time.Millisecond)
// Allocate a large enough payload for the test.
- b := make([]byte, int(receiveBufferSize)*2)
- offset := 0
- payloadSize := receiveBufferSize - 1
+ payloadSize := receiveBufferSize * 2
+ b := make([]byte, int(payloadSize))
+
worker := (c.EP).(interface {
StopWork()
ResumeWork()
@@ -6012,11 +6089,15 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// Stop the worker goroutine.
worker.StopWork()
- start := offset
- end := offset + payloadSize
+ start := 0
+ end := payloadSize / 2
packetsSent := 0
for ; start < end; start += mss {
- rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
+ packetEnd := start + mss
+ if start+mss > end {
+ packetEnd = end
+ }
+ rawEP.SendPacketWithTS(b[start:packetEnd], tsVal)
packetsSent++
}
@@ -6024,29 +6105,20 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// are waiting to be read.
worker.ResumeWork()
- // Since we read no bytes the window should goto zero till the
- // application reads some of the data.
- // Discard all intermediate acks except the last one.
- if packetsSent > 100 {
- for i := 0; i < (packetsSent / 100); i++ {
- _ = c.GetPacket()
- }
+ // Since we sent almost the full receive buffer worth of data (some may have
+ // been dropped due to segment overheads), we should get a zero window back.
+ pkt = c.GetPacket()
+ tcpHdr := header.TCP(header.IPv4(pkt).Payload())
+ gotRcvWnd := tcpHdr.WindowSize()
+ wantAckNum := tcpHdr.AckNumber()
+ if got, want := int(gotRcvWnd), 0; got != want {
+ t.Fatalf("got rcvWnd: %d, want: %d", got, want)
}
- rawEP.VerifyACKRcvWnd(0)
time.Sleep(25 * time.Millisecond)
- // Verify that sending more data when window is closed is dropped and
- // not acked.
+ // Verify that sending more data when receiveBuffer is exhausted.
rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
- // Verify that the stack sends us back an ACK with the sequence number
- // of the last packet sent indicating it was dropped.
- p := c.GetPacket()
- checker.IPv4(t, p, checker.TCP(
- checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
- checker.Window(0),
- ))
-
// Now read all the data from the endpoint and verify that advertised
// window increases to the full available buffer size.
for {
@@ -6059,23 +6131,26 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// Verify that we receive a non-zero window update ACK. When running
// under thread santizer this test can end up sending more than 1
// ack, 1 for the non-zero window
- p = c.GetPacket()
+ p := c.GetPacket()
checker.IPv4(t, p, checker.TCP(
- checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
+ checker.TCPAckNum(uint32(wantAckNum)),
func(t *testing.T, h header.Transport) {
tcp, ok := h.(header.TCP)
if !ok {
return
}
- if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) {
- t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w)
+ // We use 10% here as the error margin upwards as the initial window we
+ // got was afer 1 segment was already in the receive buffer queue.
+ tolerance := 1.1
+ if w := tcp.WindowSize(); w == 0 || w > uint16(float64(rcvWnd)*tolerance) {
+ t.Errorf("expected a non-zero window: got %d, want <= %d", w, uint16(float64(rcvWnd)*tolerance))
}
},
))
}
-// This test verifies that the auto tuning does not grow the receive buffer if
-// the application is not reading the data actively.
+// This test verifies that the advertised window is auto-tuned up as the
+// application is reading the data that is being received.
func TestReceiveBufferAutoTuning(t *testing.T) {
const mtu = 1500
const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
@@ -6085,9 +6160,6 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// Enable Auto-tuning.
stk := c.Stack()
- // Set lower limits for auto-tuning tests. This is required because the
- // test stops the worker which can cause packets to be dropped because
- // the segment queue holding unprocessed packets is limited to 300.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
{
@@ -6109,8 +6181,12 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
-
- wantRcvWnd := receiveBufferSize
+ tsVal := uint32(rawEP.TSVal)
+ rawEP.NextSeqNum--
+ rawEP.SendPacketWithTS(nil, tsVal)
+ rawEP.NextSeqNum++
+ pkt := rawEP.VerifyAndReturnACKWithTS(tsVal)
+ curRcvWnd := int(header.TCP(header.IPv4(pkt).Payload()).WindowSize()) << c.WindowScale
scaleRcvWnd := func(rcvWnd int) uint16 {
return uint16(rcvWnd >> uint16(c.WindowScale))
}
@@ -6127,14 +6203,8 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
StopWork()
ResumeWork()
})
- tsVal := rawEP.TSVal
- // We are going to do our own computation of what the moderated receive
- // buffer should be based on sent/copied data per RTT and verify that
- // the advertised window by the stack matches our calculations.
- prevCopied := 0
- done := false
latency := 1 * time.Millisecond
- for i := 0; !done; i++ {
+ for i := 0; i < 5; i++ {
tsVal++
// Stop the worker goroutine.
@@ -6156,15 +6226,20 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// Give 1ms for the worker to process the packets.
time.Sleep(1 * time.Millisecond)
- // Verify that the advertised window on the ACK is reduced by
- // the total bytes sent.
- expectedWnd := wantRcvWnd - totalSent
- if packetsSent > 100 {
- for i := 0; i < (packetsSent / 100); i++ {
- _ = c.GetPacket()
+ lastACK := c.GetPacket()
+ // Discard any intermediate ACKs and only check the last ACK we get in a
+ // short time period of few ms.
+ for {
+ time.Sleep(1 * time.Millisecond)
+ pkt := c.GetPacketNonBlocking()
+ if pkt == nil {
+ break
}
+ lastACK = pkt
+ }
+ if got, want := int(header.TCP(header.IPv4(lastACK).Payload()).WindowSize()), int(scaleRcvWnd(curRcvWnd)); got > want {
+ t.Fatalf("advertised window got: %d, want <= %d", got, want)
}
- rawEP.VerifyACKRcvWnd(scaleRcvWnd(expectedWnd))
// Now read all the data from the endpoint and invoke the
// moderation API to allow for receive buffer auto-tuning
@@ -6189,35 +6264,20 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
rawEP.NextSeqNum--
rawEP.SendPacketWithTS(nil, tsVal)
rawEP.NextSeqNum++
-
if i == 0 {
// In the first iteration the receiver based RTT is not
// yet known as a result the moderation code should not
// increase the advertised window.
- rawEP.VerifyACKRcvWnd(scaleRcvWnd(wantRcvWnd))
- prevCopied = totalCopied
+ rawEP.VerifyACKRcvWnd(scaleRcvWnd(curRcvWnd))
} else {
- rttCopied := totalCopied
- if i == 1 {
- // The moderation code accumulates copied bytes till
- // RTT is established. So add in the bytes sent in
- // the first iteration to the total bytes for this
- // RTT.
- rttCopied += prevCopied
- // Now reset it to the initial value used by the
- // auto tuning logic.
- prevCopied = tcp.InitialCwnd * mss * 2
- }
- newWnd := rttCopied<<1 + 16*mss
- grow := (newWnd * (rttCopied - prevCopied)) / prevCopied
- newWnd += (grow << 1)
- if newWnd > maxReceiveBufferSize {
- newWnd = maxReceiveBufferSize
- done = true
+ pkt := c.GetPacket()
+ curRcvWnd = int(header.TCP(header.IPv4(pkt).Payload()).WindowSize()) << c.WindowScale
+ // If thew new current window is close maxReceiveBufferSize then terminate
+ // the loop. This can happen before all iterations are done due to timing
+ // differences when running the test.
+ if int(float64(curRcvWnd)*1.1) > maxReceiveBufferSize/2 {
+ break
}
- rawEP.VerifyACKRcvWnd(scaleRcvWnd(newWnd))
- wantRcvWnd = newWnd
- prevCopied = rttCopied
// Increase the latency after first two iterations to
// establish a low RTT value in the receiver since it
// only tracks the lowest value. This ensures that when
@@ -6230,6 +6290,12 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
offset += payloadSize
payloadSize *= 2
}
+ // Check that at the end of our iterations the receive window grew close to the maximum
+ // permissible size of maxReceiveBufferSize/2
+ if got, want := int(float64(curRcvWnd)*1.1), maxReceiveBufferSize/2; got < want {
+ t.Fatalf("unexpected rcvWnd got: %d, want > %d", got, want)
+ }
+
}
func TestDelayEnabled(t *testing.T) {
@@ -6381,8 +6447,8 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
@@ -6399,8 +6465,8 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
// Now send a RST and this should be ignored and not
@@ -6428,8 +6494,8 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
}
@@ -6500,8 +6566,8 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
@@ -6518,8 +6584,8 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
// Out of order ACK should generate an immediate ACK in
@@ -6535,8 +6601,8 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
}
@@ -6607,8 +6673,8 @@ func TestTCPTimeWaitNewSyn(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
@@ -6625,8 +6691,8 @@ func TestTCPTimeWaitNewSyn(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
// Send a SYN request w/ sequence number lower than
@@ -6764,8 +6830,8 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
@@ -6782,8 +6848,8 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
time.Sleep(2 * time.Second)
@@ -6797,8 +6863,8 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+2)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+2)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
// Sleep for 4 seconds so at this point we are 1 second past the
@@ -6826,8 +6892,8 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(ackHeaders.AckNum)),
- checker.AckNum(0),
+ checker.TCPSeqNum(uint32(ackHeaders.AckNum)),
+ checker.TCPAckNum(0),
checker.TCPFlags(header.TCPFlagRst)))
if got := c.Stack().Stats().TCP.EstablishedClosed.Value(); got != want {
@@ -6926,8 +6992,8 @@ func TestTCPCloseWithData(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(iss)+2),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(iss)+2),
checker.TCPFlags(header.TCPFlagAck)))
// Now write a few bytes and then close the endpoint.
@@ -6945,8 +7011,8 @@ func TestTCPCloseWithData(t *testing.T) {
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(iss)+2), // Acknum is initial sequence number + 1
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(iss)+2), // Acknum is initial sequence number + 1
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -6960,8 +7026,8 @@ func TestTCPCloseWithData(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)+uint32(len(data))),
- checker.AckNum(uint32(iss+2)),
+ checker.TCPSeqNum(uint32(c.IRS+1)+uint32(len(data))),
+ checker.TCPAckNum(uint32(iss+2)),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
// First send a partial ACK.
@@ -7006,8 +7072,8 @@ func TestTCPCloseWithData(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(ackHeaders.AckNum)),
- checker.AckNum(0),
+ checker.TCPSeqNum(uint32(ackHeaders.AckNum)),
+ checker.TCPAckNum(0),
checker.TCPFlags(header.TCPFlagRst)))
}
@@ -7043,8 +7109,8 @@ func TestTCPUserTimeout(t *testing.T) {
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(next),
- checker.AckNum(790),
+ checker.TCPSeqNum(next),
+ checker.TCPAckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -7078,8 +7144,8 @@ func TestTCPUserTimeout(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(next)),
- checker.AckNum(uint32(0)),
+ checker.TCPSeqNum(uint32(next)),
+ checker.TCPAckNum(uint32(0)),
checker.TCPFlags(header.TCPFlagRst),
),
)
@@ -7140,8 +7206,8 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)),
- checker.AckNum(uint32(790)),
+ checker.TCPSeqNum(uint32(c.IRS)),
+ checker.TCPAckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -7166,8 +7232,8 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS+1)),
- checker.AckNum(uint32(0)),
+ checker.TCPSeqNum(uint32(c.IRS+1)),
+ checker.TCPAckNum(uint32(0)),
checker.TCPFlags(header.TCPFlagRst),
),
)
@@ -7183,9 +7249,9 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
}
}
-func TestIncreaseWindowOnReceive(t *testing.T) {
+func TestIncreaseWindowOnRead(t *testing.T) {
// This test ensures that the endpoint sends an ack,
- // after recv() when the window grows to more than 1 MSS.
+ // after read() when the window grows by more than 1 MSS.
c := context.New(t, defaultMTU)
defer c.Cleanup()
@@ -7194,10 +7260,9 @@ func TestIncreaseWindowOnReceive(t *testing.T) {
// Write chunks of ~30000 bytes. It's important that two
// payloads make it equal or longer than MSS.
- remain := rcvBuf
+ remain := rcvBuf * 2
sent := 0
data := make([]byte, defaultMTU/2)
- lastWnd := uint16(0)
for remain > len(data) {
c.SendPacket(data, &context.Headers{
@@ -7210,46 +7275,43 @@ func TestIncreaseWindowOnReceive(t *testing.T) {
})
sent += len(data)
remain -= len(data)
-
- lastWnd = uint16(remain)
- if remain > 0xffff {
- lastWnd = 0xffff
- }
- checker.IPv4(t, c.GetPacket(),
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(lastWnd),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
checker.TCPFlags(header.TCPFlagAck),
),
)
+ // Break once the window drops below defaultMTU/2
+ if wnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize(); wnd < defaultMTU/2 {
+ break
+ }
}
- if lastWnd == 0xffff || lastWnd == 0 {
- t.Fatalf("expected small, non-zero window: %d", lastWnd)
- }
-
- // We now have < 1 MSS in the buffer space. Read the data! An
- // ack should be sent in response to that. The window was not
- // zero, but it grew to larger than MSS.
- if _, _, err := c.EP.Read(nil); err != nil {
- t.Fatalf("Read failed: %s", err)
- }
-
- if _, _, err := c.EP.Read(nil); err != nil {
- t.Fatalf("Read failed: %s", err)
+ // We now have < 1 MSS in the buffer space. Read at least > 2 MSS
+ // worth of data as receive buffer space
+ read := 0
+ // defaultMTU is a good enough estimate for the MSS used for this
+ // connection.
+ for read < defaultMTU*2 {
+ v, _, err := c.EP.Read(nil)
+ if err != nil {
+ t.Fatalf("Read failed: %s", err)
+ }
+ read += len(v)
}
- // After reading two packets, we surely crossed MSS. See the ack:
+ // After reading > MSS worth of data, we surely crossed MSS. See the ack:
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(uint16(0xffff)),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
+ checker.TCPWindow(uint16(0xffff)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -7266,10 +7328,9 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) {
// Write chunks of ~30000 bytes. It's important that two
// payloads make it equal or longer than MSS.
- remain := rcvBuf
+ remain := rcvBuf * 2
sent := 0
data := make([]byte, defaultMTU/2)
- lastWnd := uint16(0)
for remain > len(data) {
c.SendPacket(data, &context.Headers{
@@ -7283,38 +7344,29 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) {
sent += len(data)
remain -= len(data)
- lastWnd = uint16(remain)
- if remain > 0xffff {
- lastWnd = 0xffff
- }
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(lastWnd),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
+ checker.TCPWindowLessThanEq(0xffff),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
- if lastWnd == 0xffff || lastWnd == 0 {
- t.Fatalf("expected small, non-zero window: %d", lastWnd)
- }
-
// Increasing the buffer from should generate an ACK,
// since window grew from small value to larger equal MSS
c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, rcvBuf*2)
- // After reading two packets, we surely crossed MSS. See the ack:
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(790+sent)),
- checker.Window(uint16(0xffff)),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(790+sent)),
+ checker.TCPWindow(uint16(0xffff)),
checker.TCPFlags(header.TCPFlagAck),
),
)
@@ -7359,8 +7411,8 @@ func TestTCPDeferAccept(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
// Give a bit of time for the socket to be delivered to the accept queue.
time.Sleep(50 * time.Millisecond)
@@ -7374,8 +7426,8 @@ func TestTCPDeferAccept(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
}
func TestTCPDeferAcceptTimeout(t *testing.T) {
@@ -7412,7 +7464,7 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn),
- checker.AckNum(uint32(irs)+1)))
+ checker.TCPAckNum(uint32(irs)+1)))
// Send data. This should result in an acceptable endpoint.
c.SendPacket([]byte{1, 2, 3, 4}, &context.Headers{
@@ -7428,8 +7480,8 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
// Give sometime for the endpoint to be delivered to the accept queue.
time.Sleep(50 * time.Millisecond)
@@ -7444,8 +7496,8 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck),
- checker.SeqNum(uint32(iss+1)),
- checker.AckNum(uint32(irs+5))))
+ checker.TCPSeqNum(uint32(iss+1)),
+ checker.TCPAckNum(uint32(irs+5))))
}
func TestResetDuringClose(t *testing.T) {
@@ -7470,8 +7522,8 @@ func TestResetDuringClose(t *testing.T) {
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(irs.Add(1))),
- checker.AckNum(uint32(iss.Add(5)))))
+ checker.TCPSeqNum(uint32(irs.Add(1))),
+ checker.TCPAckNum(uint32(iss.Add(5)))))
// Close in a separate goroutine so that we can trigger
// a race with the RST we send below. This should not
@@ -7552,3 +7604,14 @@ func TestSetStackTimeWaitReuse(t *testing.T) {
}
}
}
+
+// generateRandomPayload generates a random byte slice of the specified length
+// causing a fatal test failure if it is unable to do so.
+func generateRandomPayload(t *testing.T, n int) []byte {
+ t.Helper()
+ buf := make([]byte, n)
+ if _, err := rand.Read(buf); err != nil {
+ t.Fatalf("rand.Read(buf) failed: %s", err)
+ }
+ return buf
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
index 44593ed98..0f9ed06cd 100644
--- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
@@ -159,9 +159,9 @@ func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndS
checker.PayloadLen(len(data)+header.TCPMinimumSize+12),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(wndSize),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(wndSize),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
checker.TCPTimestampChecker(true, 0, tsVal+1),
),
@@ -181,7 +181,8 @@ func TestTimeStampEnabledAccept(t *testing.T) {
wndSize uint16
}{
{true, -1, 0xffff}, // When cookie is used window scaling is disabled.
- {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5.
+ // DefaultReceiveBufferSize is 1MB >> 5. Advertised window will be 1/2 of that.
+ {false, 5, 0x4000},
}
for _, tc := range testCases {
timeStampEnabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize)
@@ -219,9 +220,9 @@ func timeStampDisabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wnd
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(790),
- checker.Window(wndSize),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(790),
+ checker.TCPWindow(wndSize),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
checker.TCPTimestampChecker(false, 0, 0),
),
@@ -237,7 +238,9 @@ func TestTimeStampDisabledAccept(t *testing.T) {
wndSize uint16
}{
{true, -1, 0xffff}, // When cookie is used window scaling is disabled.
- {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5.
+ // DefaultReceiveBufferSize is 1MB >> 5. Advertised window will be half of
+ // that.
+ {false, 5, 0x4000},
}
for _, tc := range testCases {
timeStampDisabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize)
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 059c13821..faf51ef95 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -145,14 +145,18 @@ type Context struct {
// WindowScale is the expected window scale in SYN packets sent by
// the stack.
WindowScale uint8
+
+ // RcvdWindowScale is the actual window scale sent by the stack in
+ // SYN/SYN-ACK.
+ RcvdWindowScale uint8
}
// New allocates and initializes a test context containing a new
// stack and a link-layer endpoint.
func New(t *testing.T, mtu uint32) *Context {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
})
const sendBufferSize = 1 << 20 // 1 MiB
@@ -261,18 +265,17 @@ func (c *Context) CheckNoPacket(errMsg string) {
c.CheckNoPacketTimeout(errMsg, 1*time.Second)
}
-// GetPacket reads a packet from the link layer endpoint and verifies
+// GetPacketWithTimeout reads a packet from the link layer endpoint and verifies
// that it is an IPv4 packet with the expected source and destination
-// addresses. It will fail with an error if no packet is received for
-// 2 seconds.
-func (c *Context) GetPacket() []byte {
+// addresses. If no packet is received in the specified timeout it will return
+// nil.
+func (c *Context) GetPacketWithTimeout(timeout time.Duration) []byte {
c.t.Helper()
- ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
p, ok := c.linkEP.ReadContext(ctx)
if !ok {
- c.t.Fatalf("Packet wasn't written out")
return nil
}
@@ -299,6 +302,21 @@ func (c *Context) GetPacket() []byte {
return b
}
+// GetPacket reads a packet from the link layer endpoint and verifies
+// that it is an IPv4 packet with the expected source and destination
+// addresses.
+func (c *Context) GetPacket() []byte {
+ c.t.Helper()
+
+ p := c.GetPacketWithTimeout(5 * time.Second)
+ if p == nil {
+ c.t.Fatalf("Packet wasn't written out")
+ return nil
+ }
+
+ return p
+}
+
// GetPacketNonBlocking reads a packet from the link layer endpoint
// and verifies that it is an IPv4 packet with the expected source
// and destination address. If no packet is available it will return
@@ -486,8 +504,8 @@ func (c *Context) ReceiveAndCheckPacketWithOptions(data []byte, offset, size, op
checker.PayloadLen(size+header.TCPMinimumSize+optlen),
checker.TCP(
checker.DstPort(TestPort),
- checker.SeqNum(uint32(c.IRS.Add(seqnum.Size(1+offset)))),
- checker.AckNum(uint32(seqnum.Value(testInitialSequenceNumber).Add(1))),
+ checker.TCPSeqNum(uint32(c.IRS.Add(seqnum.Size(1+offset)))),
+ checker.TCPAckNum(uint32(seqnum.Value(testInitialSequenceNumber).Add(1))),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -513,8 +531,8 @@ func (c *Context) ReceiveNonBlockingAndCheckPacket(data []byte, offset, size int
checker.PayloadLen(size+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(TestPort),
- checker.SeqNum(uint32(c.IRS.Add(seqnum.Size(1+offset)))),
- checker.AckNum(uint32(seqnum.Value(testInitialSequenceNumber).Add(1))),
+ checker.TCPSeqNum(uint32(c.IRS.Add(seqnum.Size(1+offset)))),
+ checker.TCPAckNum(uint32(seqnum.Value(testInitialSequenceNumber).Add(1))),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
@@ -652,6 +670,7 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte)
}
tcpHdr := header.TCP(header.IPv4(b).Payload())
+ synOpts := header.ParseSynOptions(tcpHdr.Options(), false /* isAck */)
c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
c.SendPacket(nil, &Headers{
@@ -669,8 +688,8 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte)
checker.TCP(
checker.DstPort(TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(c.IRS)+1),
- checker.AckNum(uint32(iss)+1),
+ checker.TCPSeqNum(uint32(c.IRS)+1),
+ checker.TCPAckNum(uint32(iss)+1),
),
)
@@ -687,6 +706,7 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte)
c.t.Fatalf("Unexpected endpoint state: want %v, got %v", want, got)
}
+ c.RcvdWindowScale = uint8(synOpts.WS)
c.Port = tcpHdr.SourcePort()
}
@@ -758,17 +778,18 @@ func (r *RawEndpoint) SendPacket(payload []byte, opts []byte) {
r.NextSeqNum = r.NextSeqNum.Add(seqnum.Size(len(payload)))
}
-// VerifyACKWithTS verifies that the tsEcr field in the ack matches the provided
-// tsVal.
-func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) {
+// VerifyAndReturnACKWithTS verifies that the tsEcr field int he ACK matches
+// the provided tsVal as well as returns the original packet.
+func (r *RawEndpoint) VerifyAndReturnACKWithTS(tsVal uint32) []byte {
+ r.C.t.Helper()
// Read ACK and verify that tsEcr of ACK packet is [1,2,3,4]
ackPacket := r.C.GetPacket()
checker.IPv4(r.C.t, ackPacket,
checker.TCP(
checker.DstPort(r.SrcPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(r.AckNum)),
- checker.AckNum(uint32(r.NextSeqNum)),
+ checker.TCPSeqNum(uint32(r.AckNum)),
+ checker.TCPAckNum(uint32(r.NextSeqNum)),
checker.TCPTimestampChecker(true, 0, tsVal),
),
)
@@ -776,19 +797,28 @@ func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) {
tcpSeg := header.TCP(header.IPv4(ackPacket).Payload())
opts := tcpSeg.ParsedOptions()
r.RecentTS = opts.TSVal
+ return ackPacket
+}
+
+// VerifyACKWithTS verifies that the tsEcr field in the ack matches the provided
+// tsVal.
+func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) {
+ r.C.t.Helper()
+ _ = r.VerifyAndReturnACKWithTS(tsVal)
}
// VerifyACKRcvWnd verifies that the window advertised by the incoming ACK
// matches the provided rcvWnd.
func (r *RawEndpoint) VerifyACKRcvWnd(rcvWnd uint16) {
+ r.C.t.Helper()
ackPacket := r.C.GetPacket()
checker.IPv4(r.C.t, ackPacket,
checker.TCP(
checker.DstPort(r.SrcPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(r.AckNum)),
- checker.AckNum(uint32(r.NextSeqNum)),
- checker.Window(rcvWnd),
+ checker.TCPSeqNum(uint32(r.AckNum)),
+ checker.TCPAckNum(uint32(r.NextSeqNum)),
+ checker.TCPWindow(rcvWnd),
),
)
}
@@ -807,8 +837,8 @@ func (r *RawEndpoint) VerifyACKHasSACK(sackBlocks []header.SACKBlock) {
checker.TCP(
checker.DstPort(r.SrcPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(r.AckNum)),
- checker.AckNum(uint32(r.NextSeqNum)),
+ checker.TCPSeqNum(uint32(r.AckNum)),
+ checker.TCPAckNum(uint32(r.NextSeqNum)),
checker.TCPSACKBlockChecker(sackBlocks),
),
)
@@ -900,8 +930,8 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
tcpCheckers := []checker.TransportChecker{
checker.DstPort(TestPort),
checker.TCPFlags(header.TCPFlagAck),
- checker.SeqNum(uint32(c.IRS) + 1),
- checker.AckNum(uint32(iss) + 1),
+ checker.TCPSeqNum(uint32(c.IRS) + 1),
+ checker.TCPAckNum(uint32(iss) + 1),
}
// Verify that tsEcr of ACK packet is wantOptions.TSVal if the
@@ -936,7 +966,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
// Mark in context that timestamp option is enabled for this endpoint.
c.TimeStampEnabled = true
-
+ c.RcvdWindowScale = uint8(synOptions.WS)
return &RawEndpoint{
C: c,
SrcPort: tcpSeg.DestinationPort(),
@@ -1029,6 +1059,7 @@ func (c *Context) PassiveConnect(maxPayload, wndScale int, synOptions header.TCP
// value of the window scaling option to be sent in the SYN. If synOptions.WS >
// 0 then we send the WindowScale option.
func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions header.TCPSynOptions) *RawEndpoint {
+ c.t.Helper()
opts := make([]byte, header.TCPOptionsMaximumSize)
offset := 0
offset += header.EncodeMSSOption(uint32(maxPayload), opts)
@@ -1067,13 +1098,14 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
// are present.
b := c.GetPacket()
tcp := header.TCP(header.IPv4(b).Payload())
+ rcvdSynOptions := header.ParseSynOptions(tcp.Options(), true /* isAck */)
c.IRS = seqnum.Value(tcp.SequenceNumber())
tcpCheckers := []checker.TransportChecker{
checker.SrcPort(StackPort),
checker.DstPort(TestPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
- checker.AckNum(uint32(iss) + 1),
+ checker.TCPAckNum(uint32(iss) + 1),
checker.TCPSynOptions(header.TCPSynOptions{MSS: synOptions.MSS, WS: wndScale, SACKPermitted: synOptions.SACKPermitted && c.SACKEnabled()}),
}
@@ -1116,6 +1148,7 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
// Send ACK.
c.SendPacket(nil, ackHeaders)
+ c.RcvdWindowScale = uint8(rcvdSynOptions.WS)
c.Port = StackPort
return &RawEndpoint{
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index a1d0f49d9..da5b1deb2 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -12,12 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package udp contains the implementation of the UDP transport protocol. To use
-// it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing udp.NewProtocol() as one of the
-// transport protocols when calling stack.New(). Then endpoints can be created
-// by passing udp.ProtocolNumber as the transport protocol number when calling
-// Stack.NewEndpoint().
+// Package udp contains the implementation of the UDP transport protocol.
package udp
import (
@@ -50,6 +45,7 @@ const (
)
type protocol struct {
+ stack *stack.Stack
}
// Number returns the udp protocol number.
@@ -58,14 +54,14 @@ func (*protocol) Number() tcpip.TransportProtocolNumber {
}
// NewEndpoint creates a new udp endpoint.
-func (*protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
- return newEndpoint(stack, netProto, waiterQueue), nil
+func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return newEndpoint(p.stack, netProto, waiterQueue), nil
}
// NewRawEndpoint creates a new raw UDP endpoint. It implements
// stack.TransportProtocol.NewRawEndpoint.
-func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
- return raw.NewEndpoint(stack, netProto, header.UDPProtocolNumber, waiterQueue)
+func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return raw.NewEndpoint(p.stack, netProto, header.UDPProtocolNumber, waiterQueue)
}
// MinimumPacketSize returns the minimum valid udp packet size.
@@ -119,6 +115,6 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
}
// NewProtocol returns a UDP transport protocol.
-func NewProtocol() stack.TransportProtocol {
- return &protocol{}
+func NewProtocol(s *stack.Stack) stack.TransportProtocol {
+ return &protocol{stack: s}
}
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 64a5fc696..bac084acf 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -294,8 +294,8 @@ type testContext struct {
func newDualTestContext(t *testing.T, mtu uint32) *testContext {
t.Helper()
return newDualTestContextWithOptions(t, mtu, stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
}
@@ -532,8 +532,8 @@ func newMinPayload(minSize int) []byte {
func TestBindToDeviceOption(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}})
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol}})
ep, err := s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
@@ -807,8 +807,8 @@ func TestV4ReadSelfSource(t *testing.T) {
} {
t.Run(tt.name, func(t *testing.T) {
c := newDualTestContextWithOptions(t, defaultMTU, stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
HandleLocal: tt.handleLocal,
})
defer c.cleanup()
@@ -1466,6 +1466,30 @@ func TestNoChecksum(t *testing.T) {
}
}
+var _ stack.NetworkInterface = (*testInterface)(nil)
+
+type testInterface struct{}
+
+func (*testInterface) ID() tcpip.NICID {
+ return 0
+}
+
+func (*testInterface) IsLoopback() bool {
+ return false
+}
+
+func (*testInterface) Name() string {
+ return ""
+}
+
+func (*testInterface) Enabled() bool {
+ return true
+}
+
+func (*testInterface) LinkEndpoint() stack.LinkEndpoint {
+ return nil
+}
+
func TestTTL(t *testing.T) {
for _, flow := range []testFlow{unicastV4, unicastV4in6, unicastV6, unicastV6Only, multicastV4, multicastV4in6, multicastV6, broadcast, broadcastIn6} {
t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
@@ -1483,16 +1507,19 @@ func TestTTL(t *testing.T) {
if flow.isMulticast() {
wantTTL = multicastTTL
} else {
- var p stack.NetworkProtocol
+ var p stack.NetworkProtocolFactory
+ var n tcpip.NetworkProtocolNumber
if flow.isV4() {
- p = ipv4.NewProtocol()
+ p = ipv4.NewProtocol
+ n = ipv4.ProtocolNumber
} else {
- p = ipv6.NewProtocol()
+ p = ipv6.NewProtocol
+ n = ipv6.ProtocolNumber
}
- ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
- }))
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{p},
+ })
+ ep := s.NetworkProtocolInstance(n).NewEndpoint(&testInterface{}, nil, nil, nil)
wantTTL = ep.DefaultTTL()
ep.Close()
}
@@ -1516,18 +1543,6 @@ func TestSetTTL(t *testing.T) {
c.t.Fatalf("SetSockOptInt(TTLOption, %d) failed: %s", wantTTL, err)
}
- var p stack.NetworkProtocol
- if flow.isV4() {
- p = ipv4.NewProtocol()
- } else {
- p = ipv6.NewProtocol()
- }
- ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
- }))
- ep.Close()
-
testWrite(c, flow, checker.TTL(wantTTL))
})
}
@@ -2357,9 +2372,8 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
-
- TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
e := channel.New(0, defaultMTU, "")
if err := s.CreateNIC(nicID1, e); err != nil {