summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.travis.yml22
-rw-r--r--benchmarks/tcp/tcp_proxy.go8
-rw-r--r--pkg/abi/linux/BUILD1
-rw-r--r--pkg/abi/linux/fadvise.go24
-rw-r--r--pkg/bpf/interpreter_test.go2
-rw-r--r--pkg/cpuid/cpuid_arm64.go5
-rw-r--r--pkg/cpuid/cpuid_x86.go7
-rw-r--r--pkg/seccomp/seccomp_rules.go4
-rw-r--r--pkg/sentry/control/logging.go4
-rw-r--r--pkg/sentry/device/device.go3
-rw-r--r--pkg/sentry/fs/filesystems.go14
-rw-r--r--pkg/sentry/fs/host/tty.go6
-rw-r--r--pkg/sentry/fsimpl/devpts/BUILD2
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go3
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go14
-rw-r--r--pkg/sentry/fsimpl/devpts/slave.go14
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD2
-rw-r--r--pkg/sentry/fsimpl/ext/directory.go11
-rw-r--r--pkg/sentry/fsimpl/ext/inode.go3
-rw-r--r--pkg/sentry/fsimpl/ext/regular_file.go11
-rw-r--r--pkg/sentry/fsimpl/ext/symlink.go1
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD1
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go5
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go30
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go30
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go25
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go30
-rw-r--r--pkg/sentry/fsimpl/host/BUILD2
-rw-r--r--pkg/sentry/fsimpl/host/host.go55
-rw-r--r--pkg/sentry/fsimpl/host/tty.go17
-rw-r--r--pkg/sentry/fsimpl/host/util.go10
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go16
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go16
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go3
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go8
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go5
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD2
-rw-r--r--pkg/sentry/fsimpl/overlay/directory.go22
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go14
-rw-r--r--pkg/sentry/fsimpl/pipefs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go3
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD2
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go3
-rw-r--r--pkg/sentry/fsimpl/proc/task.go3
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go6
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go14
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go3
-rw-r--r--pkg/sentry/fsimpl/sys/BUILD1
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go23
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go17
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go33
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go21
-rw-r--r--pkg/sentry/kernel/fd_table.go10
-rw-r--r--pkg/sentry/kernel/pipe/BUILD2
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go18
-rw-r--r--pkg/sentry/socket/hostinet/BUILD2
-rw-r--r--pkg/sentry/socket/hostinet/socket.go8
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go14
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go103
-rw-r--r--pkg/sentry/socket/netlink/BUILD2
-rw-r--r--pkg/sentry/socket/netlink/socket_vfs2.go14
-rw-r--r--pkg/sentry/socket/netstack/BUILD2
-rw-r--r--pkg/sentry/socket/netstack/netstack.go1
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go14
-rw-r--r--pkg/sentry/socket/netstack/stack.go20
-rw-r--r--pkg/sentry/socket/unix/BUILD2
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go3
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go16
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go25
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go76
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/filesystem.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go2
-rw-r--r--pkg/sentry/vfs/BUILD2
-rw-r--r--pkg/sentry/vfs/README.md2
-rw-r--r--pkg/sentry/vfs/file_description.go18
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go25
-rw-r--r--pkg/sentry/vfs/lock.go (renamed from pkg/sentry/vfs/lock/lock.go)43
-rw-r--r--pkg/sentry/vfs/lock/BUILD13
-rw-r--r--pkg/sentry/vfs/permissions.go31
-rw-r--r--pkg/tcpip/link/nested/BUILD31
-rw-r--r--pkg/tcpip/link/nested/nested.go131
-rw-r--r--pkg/tcpip/link/nested/nested_test.go105
-rw-r--r--pkg/tcpip/link/sniffer/BUILD1
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go75
-rw-r--r--pkg/tcpip/stack/iptables.go18
-rw-r--r--pkg/tcpip/stack/iptables_types.go27
-rw-r--r--pkg/tcpip/stack/ndp_test.go157
-rw-r--r--pkg/tcpip/stack/stack.go6
-rw-r--r--pkg/tcpip/stack/stack_test.go2
-rw-r--r--pkg/tcpip/tcpip.go32
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go72
-rw-r--r--pkg/tcpip/transport/tcp/BUILD10
-rw-r--r--pkg/tcpip/transport/tcp/connect.go12
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go16
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go2
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go59
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go4
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go41
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go10
-rw-r--r--pkg/tcpip/transport/tcp/timer.go1
-rw-r--r--pkg/tcpip/transport/tcp/timer_test.go47
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go75
-rw-r--r--pkg/tcpip/transport/udp/protocol.go70
-rw-r--r--pkg/tcpip/transport/udp/udp_test.go320
-rw-r--r--pkg/test/dockerutil/dockerutil.go53
-rw-r--r--runsc/boot/loader.go8
-rwxr-xr-xscripts/iptables_tests.sh3
-rw-r--r--test/README.md4
-rw-r--r--test/e2e/integration_test.go33
-rw-r--r--test/image/image_test.go35
-rw-r--r--test/iptables/iptables_test.go7
-rw-r--r--test/packetimpact/runner/packetimpact_test.go10
-rw-r--r--test/runner/defs.bzl2
-rw-r--r--test/runner/runner.go6
-rw-r--r--test/runtimes/runner/main.go7
-rw-r--r--test/syscalls/BUILD29
-rw-r--r--test/syscalls/linux/BUILD1
-rw-r--r--test/syscalls/linux/fcntl.cc96
-rw-r--r--test/syscalls/linux/link.cc15
-rw-r--r--test/syscalls/linux/open.cc6
-rw-r--r--test/syscalls/linux/proc.cc13
-rw-r--r--test/syscalls/linux/proc_net.cc5
-rw-r--r--test/syscalls/linux/raw_socket_ipv4.cc379
-rw-r--r--test/syscalls/linux/socket_ip_unbound.cc2
-rw-r--r--test/syscalls/linux/socket_ipv4_udp_unbound.cc216
-rw-r--r--test/syscalls/linux/udp_socket_test_cases.cc115
-rw-r--r--test/syscalls/linux/xattr.cc5
-rw-r--r--test/util/test_util.h1
-rw-r--r--tools/nogo/matchers.go29
132 files changed, 2678 insertions, 786 deletions
diff --git a/.travis.yml b/.travis.yml
index 4cb202b7d..9d3141f38 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,27 @@
language: shell
dist: xenial
+git:
+ clone: false # Clone manually in before_install
+before_install:
+ - set -e -o pipefail
+ - |
+ if [ "${TRAVIS_PULL_REQUEST}" = false ]; then
+ # This is not a PR build, fetch and checkout the commit being tested
+ git clone -q --depth 1 "https://github.com/${TRAVIS_REPO_SLUG}.git" "${TRAVIS_REPO_SLUG}"
+ cd "${TRAVIS_REPO_SLUG}"
+ git fetch origin "${TRAVIS_COMMIT}" --depth 1
+ git checkout -qf "${TRAVIS_COMMIT}"
+ else
+ # This is a PR build, simulate +refs/pull/{num}/merge.
+ # We can do that by fetching +refs/pull/{num}/head and cherry picking it
+ # onto the target branch.
+ git clone -q --branch "${TRAVIS_BRANCH}" --depth 1 "https://github.com/${TRAVIS_REPO_SLUG}.git" "${TRAVIS_REPO_SLUG}"
+ cd "${TRAVIS_REPO_SLUG}"
+ git fetch origin "+refs/pull/${TRAVIS_PULL_REQUEST}/head" --depth 1
+ git config --global user.email "$(git log -1 FETCH_HEAD --pretty="%cE")"
+ git config --global user.name "$(git log -1 FETCH_HEAD --pretty="%aN")"
+ git cherry-pick --strategy=recursive -X theirs --keep-redundant-commits FETCH_HEAD
+ fi
cache:
directories:
- /home/travis/.cache/bazel/
diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go
index f5aa0b515..b3a4dbea3 100644
--- a/benchmarks/tcp/tcp_proxy.go
+++ b/benchmarks/tcp/tcp_proxy.go
@@ -228,19 +228,19 @@ func newNetstackImpl(mode string) (impl, error) {
})
// Set protocol options.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(*sack)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err)
}
// Enable Receive Buffer Auto-Tuning.
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
}
// Set Congestion Control to cubic if requested.
if *cubic {
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err)
}
}
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index 114b516e2..2b789c4ec 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -23,6 +23,7 @@ go_library(
"errors.go",
"eventfd.go",
"exec.go",
+ "fadvise.go",
"fcntl.go",
"file.go",
"file_amd64.go",
diff --git a/pkg/abi/linux/fadvise.go b/pkg/abi/linux/fadvise.go
new file mode 100644
index 000000000..b06ff9964
--- /dev/null
+++ b/pkg/abi/linux/fadvise.go
@@ -0,0 +1,24 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+const (
+ POSIX_FADV_NORMAL = 0
+ POSIX_FADV_RANDOM = 1
+ POSIX_FADV_SEQUENTIAL = 2
+ POSIX_FADV_WILLNEED = 3
+ POSIX_FADV_DONTNEED = 4
+ POSIX_FADV_NOREUSE = 5
+)
diff --git a/pkg/bpf/interpreter_test.go b/pkg/bpf/interpreter_test.go
index 547921d0a..c85d786b9 100644
--- a/pkg/bpf/interpreter_test.go
+++ b/pkg/bpf/interpreter_test.go
@@ -767,7 +767,7 @@ func TestSimpleFilter(t *testing.T) {
expectedRet: 0,
},
{
- desc: "Whitelisted syscall is allowed",
+ desc: "Allowed syscall is indeed allowed",
seccompData: seccompData{nr: 231 /* __NR_exit_group */, arch: 0xc000003e},
expectedRet: 0x7fff0000,
},
diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go
index 08381c1c0..ac7bb6774 100644
--- a/pkg/cpuid/cpuid_arm64.go
+++ b/pkg/cpuid/cpuid_arm64.go
@@ -312,8 +312,9 @@ func HostFeatureSet() *FeatureSet {
}
}
-// Reads bogomips from host /proc/cpuinfo. Must run before whitelisting.
-// This value is used to create the fake /proc/cpuinfo from a FeatureSet.
+// Reads bogomips from host /proc/cpuinfo. Must run before syscall filter
+// installation. This value is used to create the fake /proc/cpuinfo from a
+// FeatureSet.
func initCPUInfo() {
cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
if err != nil {
diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go
index 562f8f405..17a89c00d 100644
--- a/pkg/cpuid/cpuid_x86.go
+++ b/pkg/cpuid/cpuid_x86.go
@@ -1057,9 +1057,9 @@ func HostFeatureSet() *FeatureSet {
}
}
-// Reads max cpu frequency from host /proc/cpuinfo. Must run before
-// whitelisting. This value is used to create the fake /proc/cpuinfo from a
-// FeatureSet.
+// Reads max cpu frequency from host /proc/cpuinfo. Must run before syscall
+// filter installation. This value is used to create the fake /proc/cpuinfo
+// from a FeatureSet.
func initCPUFreq() {
cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
if err != nil {
@@ -1106,7 +1106,6 @@ func initFeaturesFromString() {
}
func init() {
- // initCpuFreq must be run before whitelists are enabled.
initCPUFreq()
initFeaturesFromString()
}
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
index 06308cd29..a52dc1b4e 100644
--- a/pkg/seccomp/seccomp_rules.go
+++ b/pkg/seccomp/seccomp_rules.go
@@ -56,7 +56,7 @@ func (a AllowValue) String() (s string) {
return fmt.Sprintf("%#x ", uintptr(a))
}
-// Rule stores the whitelist of syscall arguments.
+// Rule stores the allowed syscall arguments.
//
// For example:
// rule := Rule {
@@ -82,7 +82,7 @@ func (r Rule) String() (s string) {
return
}
-// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number.
+// SyscallRules stores a map of OR'ed argument rules indexed by the syscall number.
// If the 'Rules' is empty, we treat it as any argument is allowed.
//
// For example:
diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go
index 811f24324..8a500a515 100644
--- a/pkg/sentry/control/logging.go
+++ b/pkg/sentry/control/logging.go
@@ -70,8 +70,8 @@ type LoggingArgs struct {
type Logging struct{}
// Change will change the log level and strace arguments. Although
-// this functions signature requires an error it never acctually
-// return san error. It's required by the URPC interface.
+// this functions signature requires an error it never actually
+// returns an error. It's required by the URPC interface.
// Additionally, it may look odd that this is the only method
// attached to an empty struct but this is also part of how
// URPC dispatches.
diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go
index 69e71e322..f45b2bd2b 100644
--- a/pkg/sentry/device/device.go
+++ b/pkg/sentry/device/device.go
@@ -188,6 +188,9 @@ type MultiDevice struct {
// String stringifies MultiDevice.
func (m *MultiDevice) String() string {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
buf := bytes.NewBuffer(nil)
buf.WriteString("cache{")
for k, v := range m.cache {
diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go
index 084da2a8d..d41f30bbb 100644
--- a/pkg/sentry/fs/filesystems.go
+++ b/pkg/sentry/fs/filesystems.go
@@ -87,20 +87,6 @@ func RegisterFilesystem(f Filesystem) {
filesystems.registered[f.Name()] = f
}
-// UnregisterFilesystem removes a file system from the global set. To keep the
-// file system set compatible with save/restore, UnregisterFilesystem must be
-// called before save/restore methods.
-//
-// For instance, packages may unregister their file system after it is mounted.
-// This makes sense for pseudo file systems that should not be visible or
-// mountable. See whitelistfs in fs/host/fs.go for one example.
-func UnregisterFilesystem(name string) {
- filesystems.mu.Lock()
- defer filesystems.mu.Unlock()
-
- delete(filesystems.registered, name)
-}
-
// FindFilesystem returns a Filesystem registered at name or (nil, false) if name
// is not a file system type that can be found in /proc/filesystems.
func FindFilesystem(name string) (Filesystem, bool) {
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index cb91355ab..82a02fcb2 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -308,9 +308,9 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
task := kernel.TaskFromContext(ctx)
if task == nil {
// No task? Linux does not have an analog for this case, but
- // tty_check_change is more of a blacklist of cases than a
- // whitelist, and is surprisingly permissive. Allowing the
- // change seems most appropriate.
+ // tty_check_change only blocks specific cases and is
+ // surprisingly permissive. Allowing the change seems
+ // appropriate.
return nil
}
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index cf440dce8..93512c9b6 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -18,12 +18,12 @@ go_library(
"//pkg/context",
"//pkg/safemem",
"//pkg/sentry/arch",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/unimpl",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 9b0e0cca2..e6fda2b4f 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -117,7 +116,7 @@ type rootInode struct {
kernfs.InodeNotSymlink
kernfs.OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
// Keep a reference to this inode's dentry.
dentry kernfs.Dentry
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 1d22adbe3..69879498a 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -18,11 +18,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -35,7 +35,7 @@ type masterInode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
- locks lock.FileLocks
+ locks vfs.FileLocks
// Keep a reference to this inode's dentry.
dentry kernfs.Dentry
@@ -189,6 +189,16 @@ func (mfd *masterFileDescription) Stat(ctx context.Context, opts vfs.StatOptions
return mfd.inode.Stat(fs, opts)
}
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (mfd *masterFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return mfd.Locks().LockPOSIX(ctx, &mfd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (mfd *masterFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return mfd.Locks().UnlockPOSIX(ctx, &mfd.vfsfd, uid, start, length, whence)
+}
+
// maybeEmitUnimplementedEvent emits unimplemented event if cmd is valid.
func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) {
switch cmd {
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index 7fe475080..cf1a0f0ac 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -18,10 +18,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -34,7 +34,7 @@ type slaveInode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
- locks lock.FileLocks
+ locks vfs.FileLocks
// Keep a reference to this inode's dentry.
dentry kernfs.Dentry
@@ -185,3 +185,13 @@ func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions)
fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
return sfd.inode.Stat(fs, opts)
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (sfd *slaveFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (sfd *slaveFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
index 973fa0def..ef24f8159 100644
--- a/pkg/sentry/fsimpl/ext/BUILD
+++ b/pkg/sentry/fsimpl/ext/BUILD
@@ -54,13 +54,13 @@ go_library(
"//pkg/safemem",
"//pkg/sentry/arch",
"//pkg/sentry/fs",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/ext/disklayout",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/syscalls/linux",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go
index 43be6928a..357512c7e 100644
--- a/pkg/sentry/fsimpl/ext/directory.go
+++ b/pkg/sentry/fsimpl/ext/directory.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -305,3 +306,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
fd.off = offset
return offset, nil
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *directoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *directoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go
index 5caaf14ed..30636cf66 100644
--- a/pkg/sentry/fsimpl/ext/inode.go
+++ b/pkg/sentry/fsimpl/ext/inode.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -55,7 +54,7 @@ type inode struct {
// diskInode gives us access to the inode struct on disk. Immutable.
diskInode disklayout.Inode
- locks lock.FileLocks
+ locks vfs.FileLocks
// This is immutable. The first field of the implementations must have inode
// as the first field to ensure temporality.
diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go
index 152036b2e..66d14bb95 100644
--- a/pkg/sentry/fsimpl/ext/regular_file.go
+++ b/pkg/sentry/fsimpl/ext/regular_file.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -149,3 +150,13 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt
// TODO(b/134676337): Implement mmap(2).
return syserror.ENODEV
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go
index acb28d85b..62efd4095 100644
--- a/pkg/sentry/fsimpl/ext/symlink.go
+++ b/pkg/sentry/fsimpl/ext/symlink.go
@@ -66,6 +66,7 @@ func (in *inode) isSymlink() bool {
// O_PATH. For this reason most of the functions return EBADF.
type symlinkFD struct {
fileDescription
+ vfs.NoLockFD
}
// Compiles only if symlinkFD implements vfs.FileDescriptionImpl.
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 5cdeeaeb5..4a800dcf9 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -69,7 +69,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserr",
"//pkg/syserror",
"//pkg/unet",
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index b98218753..480510b2a 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -299,3 +299,8 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
return 0, syserror.EINVAL
}
}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *directoryFD) Sync(ctx context.Context) error {
+ return fd.dentry().handle.sync(ctx)
+}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 3c467e313..f065c4bad 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -374,13 +374,16 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return nil
}
if fs.opts.interop == InteropModeShared {
- // The existence of a dentry at name would be inconclusive because the
- // file it represents may have been deleted from the remote filesystem,
- // so we would need to make an RPC to revalidate the dentry. Just
- // attempt the file creation RPC instead. If a file does exist, the RPC
- // will fail with EEXIST like we would have. If the RPC succeeds, and a
- // stale dentry exists, the dentry will fail revalidation next time
- // it's used.
+ if child := parent.children[name]; child != nil && child.isSynthetic() {
+ return syserror.EEXIST
+ }
+ // The existence of a non-synthetic dentry at name would be inconclusive
+ // because the file it represents may have been deleted from the remote
+ // filesystem, so we would need to make an RPC to revalidate the dentry.
+ // Just attempt the file creation RPC instead. If a file does exist, the
+ // RPC will fail with EEXIST like we would have. If the RPC succeeds, and a
+ // stale dentry exists, the dentry will fail revalidation next time it's
+ // used.
return createInRemoteDir(parent, name)
}
if child := parent.children[name]; child != nil {
@@ -518,7 +521,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
if child == nil {
return syserror.ENOENT
}
- } else {
+ } else if child == nil || !child.isSynthetic() {
err = parent.file.unlinkAt(ctx, name, flags)
if err != nil {
if child != nil {
@@ -764,15 +767,17 @@ afterTrailingSymlink:
parent.dirMu.Unlock()
return fd, err
}
+ parent.dirMu.Unlock()
if err != nil {
- parent.dirMu.Unlock()
return nil, err
}
- // Open existing child or follow symlink.
- parent.dirMu.Unlock()
if mustCreate {
return nil, syserror.EEXIST
}
+ if !child.isDir() && rp.MustBeDir() {
+ return nil, syserror.ENOTDIR
+ }
+ // Open existing child or follow symlink.
if child.isSymlink() && rp.ShouldFollowSymlink() {
target, err := child.readlink(ctx, rp.Mount())
if err != nil {
@@ -1193,7 +1198,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if newParent.cachedMetadataAuthoritative() {
newParent.dirents = nil
newParent.touchCMtime()
- if renamed.isDir() {
+ if renamed.isDir() && (replaced == nil || !replaced.isDir()) {
+ // Increase the link count if we did not replace another directory.
newParent.incLinks()
}
}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index ac051b3a7..43c8153a4 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -53,7 +53,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/usermem"
@@ -665,7 +664,7 @@ type dentry struct {
// endpoint bound to this file.
pipe *pipe.VFSPipe
- locks lock.FileLocks
+ locks vfs.FileLocks
}
// dentryAttrMask returns a p9.AttrMask enabling all attributes used by the
@@ -1207,7 +1206,7 @@ func (d *dentry) setDeleted() {
// We only support xattrs prefixed with "user." (see b/148380782). Currently,
// there is no need to expose any other xattrs through a gofer.
func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) {
- if d.file.isNil() {
+ if d.file.isNil() || !d.userXattrSupported() {
return nil, nil
}
xattrMap, err := d.file.listXattr(ctx, size)
@@ -1233,6 +1232,9 @@ func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vf
if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
return "", syserror.EOPNOTSUPP
}
+ if !d.userXattrSupported() {
+ return "", syserror.ENODATA
+ }
return d.file.getXattr(ctx, opts.Name, opts.Size)
}
@@ -1246,6 +1248,9 @@ func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vf
if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
return syserror.EOPNOTSUPP
}
+ if !d.userXattrSupported() {
+ return syserror.EPERM
+ }
return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags)
}
@@ -1259,9 +1264,19 @@ func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name
if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
return syserror.EOPNOTSUPP
}
+ if !d.userXattrSupported() {
+ return syserror.EPERM
+ }
return d.file.removeXattr(ctx, name)
}
+// Extended attributes in the user.* namespace are only supported for regular
+// files and directories.
+func (d *dentry) userXattrSupported() bool {
+ filetype := linux.S_IFMT & atomic.LoadUint32(&d.mode)
+ return filetype == linux.S_IFREG || filetype == linux.S_IFDIR
+}
+
// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDirectory().
func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error {
// O_TRUNC unconditionally requires us to obtain a new handle (opened with
@@ -1439,9 +1454,14 @@ func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t f
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
fd.lockLogging.Do(func() {
log.Infof("Range lock using gofer file handled internally.")
})
- return fd.LockFD.LockPOSIX(ctx, uid, t, rng, block)
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 0d10cf7ac..404a452c4 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -72,7 +72,9 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
if offset < 0 {
return 0, syserror.EINVAL
}
- if opts.Flags != 0 {
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -123,9 +125,12 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
if offset < 0 {
return 0, syserror.EINVAL
}
- if opts.Flags != 0 {
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
+
limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
if err != nil {
return 0, err
@@ -489,15 +494,24 @@ func (d *dentry) writeback(ctx context.Context, offset, size int64) error {
func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
fd.mu.Lock()
defer fd.mu.Unlock()
+ newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence)
+ if err != nil {
+ return 0, err
+ }
+ fd.off = newOffset
+ return newOffset, nil
+}
+
+// Calculate the new offset for a seek operation on a regular file.
+func regularFileSeekLocked(ctx context.Context, d *dentry, fdOffset, offset int64, whence int32) (int64, error) {
switch whence {
case linux.SEEK_SET:
// Use offset as specified.
case linux.SEEK_CUR:
- offset += fd.off
+ offset += fdOffset
case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE:
// Ensure file size is up to date.
- d := fd.dentry()
- if fd.filesystem().opts.interop == InteropModeShared {
+ if !d.cachedMetadataAuthoritative() {
if err := d.updateFromGetattr(ctx); err != nil {
return 0, err
}
@@ -525,7 +539,6 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (
if offset < 0 {
return 0, syserror.EINVAL
}
- fd.off = offset
return offset, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 289efdd25..a016cbae1 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -52,7 +51,7 @@ type specialFileFD struct {
off int64
}
-func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *lock.FileLocks, flags uint32) (*specialFileFD, error) {
+func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks, flags uint32) (*specialFileFD, error) {
ftype := d.fileType()
seekable := ftype == linux.S_IFREG
mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK
@@ -130,7 +129,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
- if opts.Flags != 0 {
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -174,7 +175,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
- if opts.Flags != 0 {
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -222,21 +225,12 @@ func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (
}
fd.mu.Lock()
defer fd.mu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // Use offset as given.
- case linux.SEEK_CUR:
- offset += fd.off
- default:
- // SEEK_END, SEEK_DATA, and SEEK_HOLE aren't supported since it's not
- // clear that file size is even meaningful for these files.
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
+ newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence)
+ if err != nil {
+ return 0, err
}
- fd.off = offset
- return offset, nil
+ fd.off = newOffset
+ return newOffset, nil
}
// Sync implements vfs.FileDescriptionImpl.Sync.
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 54f16ad63..44a09d87a 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -27,6 +27,7 @@ go_library(
"//pkg/safemem",
"//pkg/sentry/arch",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/hostfd",
"//pkg/sentry/kernel",
@@ -39,7 +40,6 @@ go_library(
"//pkg/sentry/unimpl",
"//pkg/sentry/uniqueid",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserr",
"//pkg/syserror",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 5ec5100b8..a65543028 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -28,13 +28,13 @@ import (
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/refs"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -91,7 +91,9 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
isTTY: opts.IsTTY,
wouldBlock: wouldBlock(uint32(fileType)),
seekable: seekable,
- canMap: canMap(uint32(fileType)),
+ // NOTE(b/38213152): Technically, some obscure char devices can be memory
+ // mapped, but we only allow regular files.
+ canMap: fileType == linux.S_IFREG,
}
i.pf.inode = i
@@ -183,7 +185,7 @@ type inode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
- locks lock.FileLocks
+ locks vfs.FileLocks
// When the reference count reaches zero, the host fd is closed.
refs.AtomicRefCount
@@ -460,7 +462,8 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u
// TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
- if fileType == syscall.S_IFSOCK {
+ switch fileType {
+ case syscall.S_IFSOCK:
if i.isTTY {
log.Warningf("cannot use host socket fd %d as TTY", i.hostFD)
return nil, syserror.ENOTTY
@@ -472,30 +475,33 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u
}
// Currently, we only allow Unix sockets to be imported.
return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d, &i.locks)
- }
- // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that
- // we don't allow importing arbitrary file types without proper support.
- if i.isTTY {
- fd := &TTYFileDescription{
- fileDescription: fileDescription{inode: i},
- termios: linux.DefaultSlaveTermios,
+ case syscall.S_IFREG, syscall.S_IFIFO, syscall.S_IFCHR:
+ if i.isTTY {
+ fd := &TTYFileDescription{
+ fileDescription: fileDescription{inode: i},
+ termios: linux.DefaultSlaveTermios,
+ }
+ fd.LockFD.Init(&i.locks)
+ vfsfd := &fd.vfsfd
+ if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ return vfsfd, nil
}
+
+ fd := &fileDescription{inode: i}
fd.LockFD.Init(&i.locks)
vfsfd := &fd.vfsfd
if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
return nil, err
}
return vfsfd, nil
- }
- fd := &fileDescription{inode: i}
- fd.LockFD.Init(&i.locks)
- vfsfd := &fd.vfsfd
- if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
+ default:
+ log.Warningf("cannot import host fd %d with file type %o", i.hostFD, fileType)
+ return nil, syserror.EPERM
}
- return vfsfd, nil
}
// fileDescription is embedded by host fd implementations of FileDescriptionImpl.
@@ -688,7 +694,8 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
// Sync implements FileDescriptionImpl.
func (f *fileDescription) Sync(context.Context) error {
- // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything.
+ // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData
+ // optimization, so we always sync everything.
return unix.Fsync(f.inode.hostFD)
}
@@ -718,3 +725,13 @@ func (f *fileDescription) EventUnregister(e *waiter.Entry) {
func (f *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
return fdnotifier.NonBlockingPoll(int32(f.inode.hostFD), mask)
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (f *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return f.Locks().LockPOSIX(ctx, &f.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (f *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return f.Locks().UnlockPOSIX(ctx, &f.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index 68af6e5af..4ee9270cc 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -325,9 +326,9 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
task := kernel.TaskFromContext(ctx)
if task == nil {
// No task? Linux does not have an analog for this case, but
- // tty_check_change is more of a blacklist of cases than a
- // whitelist, and is surprisingly permissive. Allowing the
- // change seems most appropriate.
+ // tty_check_change only blocks specific cases and is
+ // surprisingly permissive. Allowing the change seems
+ // appropriate.
return nil
}
@@ -377,3 +378,13 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
return kernel.ERESTARTSYS
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (t *TTYFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, typ fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return t.Locks().LockPOSIX(ctx, &t.vfsfd, uid, typ, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (t *TTYFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return t.Locks().UnlockPOSIX(ctx, &t.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go
index 2bc757b1a..412bdb2eb 100644
--- a/pkg/sentry/fsimpl/host/util.go
+++ b/pkg/sentry/fsimpl/host/util.go
@@ -49,16 +49,6 @@ func wouldBlock(fileType uint32) bool {
return fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK
}
-// canMap returns true if a file with fileType is allowed to be memory mapped.
-// This is ported over from VFS1, but it's probably not the best way for us
-// to check if a file can be memory mapped.
-func canMap(fileType uint32) bool {
- // TODO(gvisor.dev/issue/1672): Also allow "special files" to be mapped (see fs/host:canMap()).
- //
- // TODO(b/38213152): Some obscure character devices can be mapped.
- return fileType == syscall.S_IFREG
-}
-
// isBlockError checks if an error is EAGAIN or EWOULDBLOCK.
// If so, they can be transformed into syserror.ErrWouldBlock.
func isBlockError(err error) bool {
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 0299dbde9..179df6c1e 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -45,11 +45,11 @@ go_library(
"//pkg/fspath",
"//pkg/log",
"//pkg/refs",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
@@ -68,7 +68,6 @@ go_test(
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserror",
"//pkg/usermem",
"@com_github_google_go-cmp//cmp:go_default_library",
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 6418de0a3..c1215b70a 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -39,7 +39,7 @@ type DynamicBytesFile struct {
InodeNotDirectory
InodeNotSymlink
- locks lock.FileLocks
+ locks vfs.FileLocks
data vfs.DynamicBytesSource
}
@@ -86,7 +86,7 @@ type DynamicBytesFD struct {
}
// Init initializes a DynamicBytesFD.
-func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *lock.FileLocks, flags uint32) error {
+func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *vfs.FileLocks, flags uint32) error {
fd.LockFD.Init(locks)
if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil {
return err
@@ -135,3 +135,13 @@ func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error {
// DynamicBytesFiles are immutable.
return syserror.EPERM
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *DynamicBytesFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *DynamicBytesFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 33a5968ca..5f7853a2a 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -57,7 +57,7 @@ type GenericDirectoryFD struct {
// NewGenericDirectoryFD creates a new GenericDirectoryFD and returns its
// dentry.
-func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) {
+func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) {
fd := &GenericDirectoryFD{}
if err := fd.Init(children, locks, opts); err != nil {
return nil, err
@@ -71,7 +71,7 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildre
// Init initializes a GenericDirectoryFD. Use it when overriding
// GenericDirectoryFD. Caller must call fd.VFSFileDescription.Init() with the
// correct implementation.
-func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) error {
+func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) error {
if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 {
// Can't open directories for writing.
return syserror.EISDIR
@@ -235,3 +235,13 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio
inode := fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode
return inode.SetStat(ctx, fd.filesystem(), creds, opts)
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *GenericDirectoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 0e4927215..650bd7b88 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -557,7 +556,7 @@ type StaticDirectory struct {
InodeNoDynamicLookup
OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
}
var _ Inode = (*StaticDirectory)(nil)
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index bbee8ccda..07a9dc830 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -425,10 +425,10 @@ type inodeDynamicLookup interface {
// IterDirents is used to iterate over dynamically created entries. It invokes
// cb on each entry in the directory represented by the FileDescription.
// 'offset' is the offset for the entire IterDirents call, which may include
- // results from the caller. 'relOffset' is the offset inside the entries
- // returned by this IterDirents invocation. In other words,
- // 'offset+relOffset+1' is the value that should be set in vfs.Dirent.NextOff,
- // while 'relOffset' is the place where iteration should start from.
+ // results from the caller (e.g. "." and ".."). 'relOffset' is the offset
+ // inside the entries returned by this IterDirents invocation. In other words,
+ // 'offset' should be used to calculate each vfs.Dirent.NextOff as well as
+ // the return value, while 'relOffset' is the place to start iteration.
IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error)
}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 6749facf7..dc407eb1d 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -103,7 +102,7 @@ type readonlyDir struct {
kernfs.InodeDirectoryNoNewChildren
kernfs.OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
dentry kernfs.Dentry
}
@@ -133,7 +132,7 @@ type dir struct {
kernfs.InodeNoDynamicLookup
kernfs.OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
fs *filesystem
dentry kernfs.Dentry
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index f9413bbdd..8cf5b35d3 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -29,11 +29,11 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go
index 6f47167d3..f5c2462a5 100644
--- a/pkg/sentry/fsimpl/overlay/directory.go
+++ b/pkg/sentry/fsimpl/overlay/directory.go
@@ -263,3 +263,25 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
return 0, syserror.EINVAL
}
}
+
+// Sync implements vfs.FileDescriptionImpl.Sync. Forwards sync to the upper
+// layer, if there is one. The lower layer doesn't need to sync because it
+// never changes.
+func (fd *directoryFD) Sync(ctx context.Context) error {
+ d := fd.dentry()
+ if !d.isCopiedUp() {
+ return nil
+ }
+ vfsObj := d.fs.vfsfs.VirtualFilesystem()
+ pop := vfs.PathOperation{
+ Root: d.upperVD,
+ Start: d.upperVD,
+ }
+ upperFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY})
+ if err != nil {
+ return err
+ }
+ err = upperFD.Sync(ctx)
+ upperFD.DecRef()
+ return err
+}
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index e660d0e2c..e11a3ff19 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -35,9 +35,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -415,7 +415,7 @@ type dentry struct {
devMinor uint32
ino uint64
- locks lock.FileLocks
+ locks vfs.FileLocks
}
// newDentry creates a new dentry. The dentry initially has no references; it
@@ -610,3 +610,13 @@ func (fd *fileDescription) filesystem() *filesystem {
func (fd *fileDescription) dentry() *dentry {
return fd.vfsfd.Dentry().Impl().(*dentry)
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD
index c618dbe6c..5950a2d59 100644
--- a/pkg/sentry/fsimpl/pipefs/BUILD
+++ b/pkg/sentry/fsimpl/pipefs/BUILD
@@ -15,7 +15,6 @@ go_library(
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserror",
"//pkg/usermem",
],
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index e4dabaa33..dd7eaf4a8 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -82,7 +81,7 @@ type inode struct {
kernfs.InodeNotSymlink
kernfs.InodeNoopRefCount
- locks lock.FileLocks
+ locks vfs.FileLocks
pipe *pipe.VFSPipe
ino uint64
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 351ba4ee9..6014138ff 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -22,6 +22,7 @@ go_library(
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsbridge",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/inet",
@@ -35,7 +36,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserror",
"//pkg/tcpip/header",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index e2cdb7ee9..36a89540c 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -38,7 +37,7 @@ type subtasksInode struct {
kernfs.OrderedChildren
kernfs.AlwaysValid
- locks lock.FileLocks
+ locks vfs.FileLocks
fs *filesystem
task *kernel.Task
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 44078a765..8bb2b0ce1 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -39,7 +38,7 @@ type taskInode struct {
kernfs.InodeAttrs
kernfs.OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
task *kernel.Task
}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index ef6c1d04f..fea29e5f0 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -54,7 +53,7 @@ func taskFDExists(t *kernel.Task, fd int32) bool {
}
type fdDir struct {
- locks lock.FileLocks
+ locks vfs.FileLocks
fs *filesystem
task *kernel.Task
@@ -65,7 +64,7 @@ type fdDir struct {
}
// IterDirents implements kernfs.inodeDynamicLookup.
-func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, absOffset, relOffset int64) (int64, error) {
+func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
var fds []int32
i.task.WithMuLocked(func(t *kernel.Task) {
if fdTable := t.FDTable(); fdTable != nil {
@@ -73,7 +72,6 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, abs
}
})
- offset := absOffset + relOffset
typ := uint8(linux.DT_REG)
if i.produceSymlink {
typ = linux.DT_LNK
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index e5eaa91cd..ba4405026 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -30,7 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -777,7 +777,7 @@ type namespaceInode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
- locks lock.FileLocks
+ locks vfs.FileLocks
}
var _ kernfs.Inode = (*namespaceInode)(nil)
@@ -830,3 +830,13 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err
func (fd *namespaceFD) Release() {
fd.inode.DecRef()
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *namespaceFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *namespaceFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 58c8b9d05..2f214d0c2 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -44,7 +43,7 @@ type tasksInode struct {
kernfs.OrderedChildren
kernfs.AlwaysValid
- locks lock.FileLocks
+ locks vfs.FileLocks
fs *filesystem
pidns *kernel.PIDNamespace
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index 237f17def..a741e2bb6 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -15,7 +15,6 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserror",
],
)
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index b84463d3a..fe02f7ee9 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -101,7 +100,7 @@ type dir struct {
kernfs.InodeDirectoryNoNewChildren
kernfs.OrderedChildren
- locks lock.FileLocks
+ locks vfs.FileLocks
dentry kernfs.Dentry
}
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index 062321cbc..e73732a6b 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -62,7 +62,6 @@ go_library(
"//pkg/sentry/uniqueid",
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sentry/vfs/memxattr",
"//pkg/sync",
"//pkg/syserror",
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 72399b321..637d84e04 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -79,7 +79,7 @@ afterSymlink:
}
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
// Symlink traversal updates access time.
- atomic.StoreInt64(&d.inode.atime, d.inode.fs.clock.Now().Nanoseconds())
+ child.inode.touchAtime(rp.Mount())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -237,18 +237,22 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EXDEV
}
d := vd.Dentry().Impl().(*dentry)
- if d.inode.isDir() {
+ i := d.inode
+ if i.isDir() {
return syserror.EPERM
}
- if d.inode.nlink == 0 {
+ if err := vfs.MayLink(auth.CredentialsFromContext(ctx), linux.FileMode(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
+ return err
+ }
+ if i.nlink == 0 {
return syserror.ENOENT
}
- if d.inode.nlink == maxLinks {
+ if i.nlink == maxLinks {
return syserror.EMLINK
}
- d.inode.incLinksLocked()
- d.inode.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent)
- parentDir.insertChildLocked(fs.newDentry(d.inode), name)
+ i.incLinksLocked()
+ i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ parentDir.insertChildLocked(fs.newDentry(i), name)
return nil
})
}
@@ -368,6 +372,9 @@ afterTrailingSymlink:
parentDir.inode.touchCMtime()
return fd, nil
}
+ if mustCreate {
+ return nil, syserror.EEXIST
+ }
// Is the file mounted over?
if err := rp.CheckMount(&child.vfsd); err != nil {
return nil, err
@@ -375,7 +382,7 @@ afterTrailingSymlink:
// Do we need to resolve a trailing symlink?
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
// Symlink traversal updates access time.
- atomic.StoreInt64(&child.inode.atime, child.inode.fs.clock.Now().Nanoseconds())
+ child.inode.touchAtime(rp.Mount())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 77447b32c..b805aadd0 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -279,6 +279,12 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
if offset < 0 {
return 0, syserror.EINVAL
}
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
if dst.NumBytes() == 0 {
return 0, nil
}
@@ -304,6 +310,12 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
if offset < 0 {
return 0, syserror.EINVAL
}
+
+ // Check that flags are supported. Silently ignore RWF_HIPRI.
+ if opts.Flags&^linux.RWF_HIPRI != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
srclen := src.NumBytes()
if srclen == 0 {
return 0, nil
@@ -360,11 +372,6 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (
return offset, nil
}
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *regularFileFD) Sync(ctx context.Context) error {
- return nil
-}
-
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
file := fd.inode().impl.(*regularFile)
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 64e1c40ad..146c7fdfe 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -138,48 +138,37 @@ func TestLocks(t *testing.T) {
}
defer cleanup()
- var (
- uid1 lock.UniqueID
- uid2 lock.UniqueID
- // Non-blocking.
- block lock.Blocker
- )
-
- uid1 = 123
- uid2 = 456
-
- if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, block); err != nil {
+ uid1 := 123
+ uid2 := 456
+ if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, block); err != nil {
+ if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil {
t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err)
}
- if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block); err != nil {
+ if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- rng1 := lock.LockRange{0, 1}
- rng2 := lock.LockRange{1, 2}
-
- if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, rng1, block); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, 0, 1, linux.SEEK_SET, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng2, block); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 1, 2, linux.SEEK_SET, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, rng1, block); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, 0, 1, linux.SEEK_SET, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng1, block), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 0, 1, linux.SEEK_SET, nil), syserror.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want)
}
- if err := fd.Impl().UnlockPOSIX(ctx, uid1, rng1); err != nil {
+ if err := fd.Impl().UnlockPOSIX(ctx, uid1, 0, 1, linux.SEEK_SET); err != nil {
t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err)
}
}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 71a7522af..a94333ee0 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -36,11 +36,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs/memxattr"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -310,7 +310,7 @@ type inode struct {
ctime int64 // nanoseconds
mtime int64 // nanoseconds
- locks lock.FileLocks
+ locks vfs.FileLocks
// Inotify watches for this inode.
watches vfs.Watches
@@ -761,9 +761,26 @@ func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name s
// Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with
// FMODE_READ | FMODE_WRITE.
var fd regularFileFD
+ fd.Init(&inode.locks)
flags := uint32(linux.O_RDWR)
if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
return nil, err
}
return &fd.vfsfd, nil
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync. It does nothing because all
+// filesystem state is in-memory.
+func (*fileDescription) Sync(context.Context) error {
+ return nil
+}
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 48911240f..4b7d234a4 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -29,6 +29,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// FDFlags define flags for an individual descriptor.
@@ -148,7 +149,12 @@ func (f *FDTable) drop(file *fs.File) {
// dropVFS2 drops the table reference.
func (f *FDTable) dropVFS2(file *vfs.FileDescription) {
- // TODO(gvisor.dev/issue/1480): Release locks.
+ // Release any POSIX lock possibly held by the FDTable. Range {0, 0} means the
+ // entire file.
+ err := file.UnlockPOSIX(context.Background(), f, 0, 0, linux.SEEK_SET)
+ if err != nil && err != syserror.ENOLCK {
+ panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
+ }
// Generate inotify events.
ev := uint32(linux.IN_CLOSE_NOWRITE)
@@ -157,7 +163,7 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) {
}
file.Dentry().InotifyWithParent(ev, 0, vfs.PathEvent)
- // Drop the table reference.
+ // Drop the table's reference.
file.DecRef()
}
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index 0db546b98..449643118 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -26,8 +26,8 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index c0e9ee1f4..a4519363f 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -20,8 +20,8 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -63,12 +63,12 @@ func NewVFSPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *VFSPipe {
// Preconditions: statusFlags should not contain an open access mode.
func (vp *VFSPipe) ReaderWriterPair(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription) {
// Connected pipes share the same locks.
- locks := &lock.FileLocks{}
+ locks := &vfs.FileLocks{}
return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks)
}
// Open opens the pipe represented by vp.
-func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) (*vfs.FileDescription, error) {
+func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
vp.mu.Lock()
defer vp.mu.Unlock()
@@ -130,7 +130,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s
}
// Preconditions: vp.mu must be held.
-func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) *vfs.FileDescription {
+func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) *vfs.FileDescription {
fd := &VFSPipeFD{
pipe: &vp.pipe,
}
@@ -451,3 +451,13 @@ func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFr
}
return n, err
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *VFSPipeFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *VFSPipeFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index 60c9896fc..ff81ea6e6 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -26,6 +26,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/hostfd",
"//pkg/sentry/inet",
@@ -34,7 +35,6 @@ go_library(
"//pkg/sentry/socket",
"//pkg/sentry/socket/control",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserr",
"//pkg/syserror",
"//pkg/tcpip/stack",
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index c11e82c10..a92aed2c9 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -324,7 +324,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr
return nil, syserr.ErrInvalidArgument
}
- // Whitelist options and constrain option length.
+ // Only allow known and safe options.
optlen := getSockOptLen(t, level, name)
switch level {
case linux.SOL_IP:
@@ -369,7 +369,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr
// SetSockOpt implements socket.Socket.SetSockOpt.
func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error {
- // Whitelist options and constrain option length.
+ // Only allow known and safe options.
optlen := setSockOptLen(t, level, name)
switch level {
case linux.SOL_IP:
@@ -415,7 +415,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []
// RecvMsg implements socket.Socket.RecvMsg.
func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
- // Whitelist flags.
+ // Only allow known and safe flags.
//
// FIXME(jamieliu): We can't support MSG_ERRQUEUE because it uses ancillary
// messages that gvisor/pkg/tcpip/transport/unix doesn't understand. Kill the
@@ -537,7 +537,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
// SendMsg implements socket.Socket.SendMsg.
func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
- // Whitelist flags.
+ // Only allow known and safe flags.
if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 {
return 0, syserr.ErrInvalidArgument
}
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index 027add1fd..ad5f64799 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -21,12 +21,12 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -61,7 +61,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in
fd: fd,
},
}
- s.LockFD.Init(&lock.FileLocks{})
+ s.LockFD.Init(&vfs.FileLocks{})
if err := fdnotifier.AddFD(int32(fd), &s.queue); err != nil {
return nil, syserr.FromError(err)
}
@@ -134,6 +134,16 @@ func (s *socketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
return int64(n), err
}
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (s *socketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (s *socketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
+}
+
type socketProviderVFS2 struct {
family int
}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 66015e2bc..f7abe77d3 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -41,19 +41,6 @@ const errorTargetName = "ERROR"
// change the destination port/destination IP for packets.
const redirectTargetName = "REDIRECT"
-// Metadata is used to verify that we are correctly serializing and
-// deserializing iptables into structs consumable by the iptables tool. We save
-// a metadata struct when the tables are written, and when they are read out we
-// verify that certain fields are the same.
-//
-// metadata is used by this serialization/deserializing code, not netstack.
-type metadata struct {
- HookEntry [linux.NF_INET_NUMHOOKS]uint32
- Underflow [linux.NF_INET_NUMHOOKS]uint32
- NumEntries uint32
- Size uint32
-}
-
// enableLogging controls whether to log the (de)serialization of netfilter
// structs between userspace and netstack. These logs are useful when
// developing iptables, but can pollute sentry logs otherwise.
@@ -83,29 +70,13 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
return linux.IPTGetinfo{}, syserr.FromError(err)
}
- // Find the appropriate table.
- table, err := findTable(stack, info.Name)
+ _, info, err := convertNetstackToBinary(stack, info.Name)
if err != nil {
- nflog("%v", err)
+ nflog("couldn't convert iptables: %v", err)
return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
}
- // Get the hooks that apply to this table.
- info.ValidHooks = table.ValidHooks()
-
- // Grab the metadata struct, which is used to store information (e.g.
- // the number of entries) that applies to the user's encoding of
- // iptables, but not netstack's.
- metadata := table.Metadata().(metadata)
-
- // Set values from metadata.
- info.HookEntry = metadata.HookEntry
- info.Underflow = metadata.Underflow
- info.NumEntries = metadata.NumEntries
- info.Size = metadata.Size
-
nflog("returning info: %+v", info)
-
return info, nil
}
@@ -118,23 +89,13 @@ func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
return linux.KernelIPTGetEntries{}, syserr.FromError(err)
}
- // Find the appropriate table.
- table, err := findTable(stack, userEntries.Name)
- if err != nil {
- nflog("%v", err)
- return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
- }
-
// Convert netstack's iptables rules to something that the iptables
// tool can understand.
- entries, meta, err := convertNetstackToBinary(userEntries.Name.String(), table)
+ entries, _, err := convertNetstackToBinary(stack, userEntries.Name)
if err != nil {
nflog("couldn't read entries: %v", err)
return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
}
- if meta != table.Metadata().(metadata) {
- panic(fmt.Sprintf("Table %q metadata changed between writing and reading. Was saved as %+v, but is now %+v", userEntries.Name.String(), table.Metadata().(metadata), meta))
- }
if binary.Size(entries) > uintptr(outLen) {
nflog("insufficient GetEntries output size: %d", uintptr(outLen))
return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
@@ -143,44 +104,26 @@ func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
return entries, nil
}
-func findTable(stk *stack.Stack, tablename linux.TableName) (stack.Table, error) {
- table, ok := stk.IPTables().GetTable(tablename.String())
- if !ok {
- return stack.Table{}, fmt.Errorf("couldn't find table %q", tablename)
- }
- return table, nil
-}
-
-// FillIPTablesMetadata populates stack's IPTables with metadata.
-func FillIPTablesMetadata(stk *stack.Stack) {
- stk.IPTables().ModifyTables(func(tables map[string]stack.Table) {
- // In order to fill in the metadata, we have to translate ipt from its
- // netstack format to Linux's giant-binary-blob format.
- for name, table := range tables {
- _, metadata, err := convertNetstackToBinary(name, table)
- if err != nil {
- panic(fmt.Errorf("Unable to set default IP tables: %v", err))
- }
- table.SetMetadata(metadata)
- tables[name] = table
- }
- })
-}
-
// convertNetstackToBinary converts the iptables as stored in netstack to the
// format expected by the iptables tool. Linux stores each table as a binary
// blob that can only be traversed by parsing a bit, reading some offsets,
// jumping to those offsets, parsing again, etc.
-func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelIPTGetEntries, metadata, error) {
- // Return values.
+func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
+ table, ok := stack.IPTables().GetTable(tablename.String())
+ if !ok {
+ return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+ }
+
var entries linux.KernelIPTGetEntries
- var meta metadata
+ var info linux.IPTGetinfo
+ info.ValidHooks = table.ValidHooks()
// The table name has to fit in the struct.
if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
- return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("table name %q too long.", tablename)
+ return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
}
- copy(entries.Name[:], tablename)
+ copy(info.Name[:], tablename[:])
+ copy(entries.Name[:], tablename[:])
for ruleIdx, rule := range table.Rules {
nflog("convert to binary: current offset: %d", entries.Size)
@@ -189,14 +132,14 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI
for hook, hookRuleIdx := range table.BuiltinChains {
if hookRuleIdx == ruleIdx {
nflog("convert to binary: found hook %d at offset %d", hook, entries.Size)
- meta.HookEntry[hook] = entries.Size
+ info.HookEntry[hook] = entries.Size
}
}
// Is this a chain underflow point?
for underflow, underflowRuleIdx := range table.Underflows {
if underflowRuleIdx == ruleIdx {
nflog("convert to binary: found underflow %d at offset %d", underflow, entries.Size)
- meta.Underflow[underflow] = entries.Size
+ info.Underflow[underflow] = entries.Size
}
}
@@ -251,12 +194,12 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI
entries.Size += uint32(entry.NextOffset)
entries.Entrytable = append(entries.Entrytable, entry)
- meta.NumEntries++
+ info.NumEntries++
}
- nflog("convert to binary: finished with an marshalled size of %d", meta.Size)
- meta.Size = entries.Size
- return entries, meta, nil
+ nflog("convert to binary: finished with an marshalled size of %d", info.Size)
+ info.Size = entries.Size
+ return entries, info, nil
}
func marshalTarget(target stack.Target) []byte {
@@ -569,12 +512,6 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
// - There are no chains without an unconditional final rule.
// - There are no chains without an unconditional underflow rule.
- table.SetMetadata(metadata{
- HookEntry: replace.HookEntry,
- Underflow: replace.Underflow,
- NumEntries: replace.NumEntries,
- Size: replace.Size,
- })
stk.IPTables().ReplaceTable(replace.Name.String(), table)
return nil
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index 420e573c9..d5ca3ac56 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -20,6 +20,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
@@ -29,7 +30,6 @@ go_library(
"//pkg/sentry/socket/unix",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserr",
"//pkg/syserror",
diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go
index 8bfee5193..dbcd8b49a 100644
--- a/pkg/sentry/socket/netlink/socket_vfs2.go
+++ b/pkg/sentry/socket/netlink/socket_vfs2.go
@@ -18,12 +18,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -78,7 +78,7 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV
sendBufferSize: defaultSendBufferSize,
},
}
- fd.LockFD.Init(&lock.FileLocks{})
+ fd.LockFD.Init(&vfs.FileLocks{})
return fd, nil
}
@@ -140,3 +140,13 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{})
return int64(n), err.ToError()
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index 0f592ecc3..ea6ebd0e2 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -28,6 +28,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
@@ -37,7 +38,6 @@ go_library(
"//pkg/sentry/socket/netfilter",
"//pkg/sentry/unimpl",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserr",
"//pkg/syserror",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 738277391..c0b63a803 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -191,6 +191,7 @@ var Metrics = tcpip.Stats{
MalformedPacketsReceived: mustCreateMetric("/netstack/udp/malformed_packets_received", "Number of incoming UDP datagrams dropped due to the UDP header being in a malformed state."),
PacketsSent: mustCreateMetric("/netstack/udp/packets_sent", "Number of UDP datagrams sent."),
PacketSendErrors: mustCreateMetric("/netstack/udp/packet_send_errors", "Number of UDP datagrams failed to be sent."),
+ ChecksumErrors: mustCreateMetric("/netstack/udp/checksum_errors", "Number of UDP datagrams dropped due to bad checksums."),
},
}
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 1412a4810..d65a89316 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -19,13 +19,13 @@ import (
"gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -66,7 +66,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu
protocol: protocol,
},
}
- s.LockFD.Init(&lock.FileLocks{})
+ s.LockFD.Init(&vfs.FileLocks{})
vfsfd := &s.vfsfd
if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{
DenyPRead: true,
@@ -318,3 +318,13 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 9b44c2b89..d2fb655ea 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -18,7 +18,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -144,7 +143,7 @@ func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
- var rs tcp.ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &rs)
return inet.TCPBufferSize{
Min: rs.Min,
@@ -155,7 +154,7 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
// SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
- rs := tcp.ReceiveBufferSizeOption{
+ rs := tcpip.StackReceiveBufferSizeOption{
Min: size.Min,
Default: size.Default,
Max: size.Max,
@@ -165,7 +164,7 @@ func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
// TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
- var ss tcp.SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &ss)
return inet.TCPBufferSize{
Min: ss.Min,
@@ -176,7 +175,7 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
// SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
- ss := tcp.SendBufferSizeOption{
+ ss := tcpip.StackSendBufferSizeOption{
Min: size.Min,
Default: size.Default,
Max: size.Max,
@@ -186,14 +185,14 @@ func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
// TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
func (s *Stack) TCPSACKEnabled() (bool, error) {
- var sack tcp.SACKEnabled
+ var sack tcpip.StackSACKEnabled
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &sack)
return bool(sack), syserr.TranslateNetstackError(err).ToError()
}
// SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
- return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError()
+ return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enabled))).ToError()
}
// Statistics implements inet.Stack.Statistics.
@@ -314,7 +313,7 @@ func (s *Stack) Statistics(stat interface{}, arg string) error {
udp.PacketsSent.Value(), // OutDatagrams.
udp.ReceiveBufferErrors.Value(), // RcvbufErrors.
0, // Udp/SndbufErrors.
- 0, // Udp/InCsumErrors.
+ udp.ChecksumErrors.Value(), // Udp/InCsumErrors.
0, // Udp/IgnoredMulti.
}
default:
@@ -366,11 +365,6 @@ func (s *Stack) IPTables() (*stack.IPTables, error) {
return s.Stack.IPTables(), nil
}
-// FillIPTablesMetadata populates stack's IPTables with metadata.
-func (s *Stack) FillIPTablesMetadata() {
- netfilter.FillIPTablesMetadata(s.Stack)
-}
-
// Resume implements inet.Stack.Resume.
func (s *Stack) Resume() {
s.Stack.Resume()
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index 7d4cc80fe..cca5e70f1 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/time",
@@ -29,7 +30,6 @@ go_library(
"//pkg/sentry/socket/netstack",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
- "//pkg/sentry/vfs/lock",
"//pkg/syserr",
"//pkg/syserror",
"//pkg/tcpip",
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index 09c6d3b27..a1e49cc57 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -476,6 +476,9 @@ func (e *connectionedEndpoint) Readiness(mask waiter.EventMask) waiter.EventMask
// State implements socket.Socket.State.
func (e *connectionedEndpoint) State() uint32 {
+ e.Lock()
+ defer e.Unlock()
+
if e.Connected() {
return linux.SS_CONNECTED
}
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 8c32371a2..ff2149250 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -26,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -53,7 +53,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType)
mnt := t.Kernel().SocketMount()
d := sockfs.NewDentry(t.Credentials(), mnt)
- fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &lock.FileLocks{})
+ fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{})
if err != nil {
return nil, syserr.FromError(err)
}
@@ -62,7 +62,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType)
// NewFileDescription creates and returns a socket file description
// corresponding to the given mount and dentry.
-func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *lock.FileLocks) (*vfs.FileDescription, error) {
+func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
// SOCK_DGRAM and don't require CAP_NET_RAW.
if stype == linux.SOCK_RAW {
@@ -300,6 +300,16 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
}
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
+}
+
// providerVFS2 is a unix domain socket provider for VFS2.
type providerVFS2 struct{}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 696e1c8d3..35eba20c5 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1111,17 +1111,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
}
-// LINT.ThenChange(vfs2/fd.go)
-
-const (
- _FADV_NORMAL = 0
- _FADV_RANDOM = 1
- _FADV_SEQUENTIAL = 2
- _FADV_WILLNEED = 3
- _FADV_DONTNEED = 4
- _FADV_NOREUSE = 5
-)
-
// Fadvise64 implements linux syscall fadvise64(2).
// This implementation currently ignores the provided advice.
func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
@@ -1146,12 +1135,12 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
switch advice {
- case _FADV_NORMAL:
- case _FADV_RANDOM:
- case _FADV_SEQUENTIAL:
- case _FADV_WILLNEED:
- case _FADV_DONTNEED:
- case _FADV_NOREUSE:
+ case linux.POSIX_FADV_NORMAL:
+ case linux.POSIX_FADV_RANDOM:
+ case linux.POSIX_FADV_SEQUENTIAL:
+ case linux.POSIX_FADV_WILLNEED:
+ case linux.POSIX_FADV_DONTNEED:
+ case linux.POSIX_FADV_NOREUSE:
default:
return 0, nil, syserror.EINVAL
}
@@ -1160,8 +1149,6 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, nil
}
-// LINT.IfChange
-
func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index f9ccb303c..e68b20bed 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -17,10 +17,12 @@ package vfs2
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -167,8 +169,82 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
err := tmpfs.AddSeals(file, args[2].Uint())
return 0, nil, err
+ case linux.F_SETLK, linux.F_SETLKW:
+ return 0, nil, posixLock(t, args, file, cmd)
default:
// TODO(gvisor.dev/issue/2920): Everything else is not yet supported.
return 0, nil, syserror.EINVAL
}
}
+
+func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error {
+ // Copy in the lock request.
+ flockAddr := args[2].Pointer()
+ var flock linux.Flock
+ if _, err := t.CopyIn(flockAddr, &flock); err != nil {
+ return err
+ }
+
+ var blocker lock.Blocker
+ if cmd == linux.F_SETLKW {
+ blocker = t
+ }
+
+ switch flock.Type {
+ case linux.F_RDLCK:
+ if !file.IsReadable() {
+ return syserror.EBADF
+ }
+ return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker)
+
+ case linux.F_WRLCK:
+ if !file.IsWritable() {
+ return syserror.EBADF
+ }
+ return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker)
+
+ case linux.F_UNLCK:
+ return file.UnlockPOSIX(t, t.FDTable(), uint64(flock.Start), uint64(flock.Len), flock.Whence)
+
+ default:
+ return syserror.EINVAL
+ }
+}
+
+// Fadvise64 implements fadvise64(2).
+// This implementation currently ignores the provided advice.
+func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ length := args[2].Int64()
+ advice := args[3].Int()
+
+ // Note: offset is allowed to be negative.
+ if length < 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ file := t.GetFileVFS2(fd)
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+
+ // If the FD refers to a pipe or FIFO, return error.
+ if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe {
+ return 0, nil, syserror.ESPIPE
+ }
+
+ switch advice {
+ case linux.POSIX_FADV_NORMAL:
+ case linux.POSIX_FADV_RANDOM:
+ case linux.POSIX_FADV_SEQUENTIAL:
+ case linux.POSIX_FADV_WILLNEED:
+ case linux.POSIX_FADV_DONTNEED:
+ case linux.POSIX_FADV_NOREUSE:
+ default:
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Sure, whatever.
+ return 0, nil, nil
+}
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 46d3e189c..5dac77e4d 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -313,6 +313,9 @@ func symlinkat(t *kernel.Task, targetAddr usermem.Addr, newdirfd int32, linkpath
if err != nil {
return err
}
+ if len(target) == 0 {
+ return syserror.ENOENT
+ }
linkpath, err := copyInPath(t, linkpathAddr)
if err != nil {
return err
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index caa6a98ff..b463edf2a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -108,7 +108,7 @@ func Override() {
s.Table[209] = syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"})
s.Table[213] = syscalls.Supported("epoll_create", EpollCreate)
s.Table[217] = syscalls.Supported("getdents64", Getdents64)
- delete(s.Table, 221) // fdavise64
+ s.Table[221] = syscalls.PartiallySupported("fadvise64", Fadvise64, "The syscall is 'supported', but ignores all provided advice.", nil)
s.Table[232] = syscalls.Supported("epoll_wait", EpollWait)
s.Table[233] = syscalls.Supported("epoll_ctl", EpollCtl)
s.Table[235] = syscalls.Supported("utimes", Utimes)
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 16d9f3a28..642769e7c 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -44,6 +44,7 @@ go_library(
"filesystem_impl_util.go",
"filesystem_type.go",
"inotify.go",
+ "lock.go",
"mount.go",
"mount_unsafe.go",
"options.go",
@@ -72,7 +73,6 @@ go_library(
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/uniqueid",
- "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 66f3105bd..4b9faf2ea 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -169,8 +169,6 @@ This construction, which is essentially a type-safe analogue to Linux's
- binder, which is similarly far too incomplete to use.
- - whitelistfs, which we are already actively attempting to remove.
-
- Save/restore. For instance, it is unclear if the current implementation of
the `state` package supports the inheritance pattern described above.
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 13c48824e..e0538ea53 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -438,14 +438,10 @@ type FileDescriptionImpl interface {
UnlockBSD(ctx context.Context, uid lock.UniqueID) error
// LockPOSIX tries to acquire a POSIX-style advisory file lock.
- //
- // TODO(gvisor.dev/issue/1480): POSIX-style file locking
- LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error
+ LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, length uint64, whence int16, block lock.Blocker) error
// UnlockPOSIX releases a POSIX-style advisory file lock.
- //
- // TODO(gvisor.dev/issue/1480): POSIX-style file locking
- UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error
+ UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, length uint64, whence int16) error
}
// Dirent holds the information contained in struct linux_dirent64.
@@ -764,3 +760,13 @@ func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType,
func (fd *FileDescription) UnlockBSD(ctx context.Context) error {
return fd.impl.UnlockBSD(ctx, fd)
}
+
+// LockPOSIX locks a POSIX-style file range lock.
+func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, end uint64, whence int16, block lock.Blocker) error {
+ return fd.impl.LockPOSIX(ctx, uid, t, start, end, whence, block)
+}
+
+// UnlockPOSIX unlocks a POSIX-style file range lock.
+func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, end uint64, whence int16) error {
+ return fd.impl.UnlockPOSIX(ctx, uid, start, end, whence)
+}
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index af7213dfd..1e66997ce 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -369,14 +368,19 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M
// LockFD may be used by most implementations of FileDescriptionImpl.Lock*
// functions. Caller must call Init().
type LockFD struct {
- locks *lock.FileLocks
+ locks *FileLocks
}
// Init initializes fd with FileLocks to use.
-func (fd *LockFD) Init(locks *lock.FileLocks) {
+func (fd *LockFD) Init(locks *FileLocks) {
fd.locks = locks
}
+// Locks returns the locks associated with this file.
+func (fd *LockFD) Locks() *FileLocks {
+ return fd.locks
+}
+
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
return fd.locks.LockBSD(uid, t, block)
@@ -388,17 +392,6 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
return nil
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
- return fd.locks.LockPOSIX(uid, t, rng, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error {
- fd.locks.UnlockPOSIX(uid, rng)
- return nil
-}
-
// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
// returning ENOLCK.
type NoLockFD struct{}
@@ -414,11 +407,11 @@ func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
+func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
return syserror.ENOLCK
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error {
+func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
return syserror.ENOLCK
}
diff --git a/pkg/sentry/vfs/lock/lock.go b/pkg/sentry/vfs/lock.go
index 724dfe743..6c7583a81 100644
--- a/pkg/sentry/vfs/lock/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -17,9 +17,11 @@
//
// The actual implementations can be found in the lock package under
// sentry/fs/lock.
-package lock
+package vfs
import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -56,7 +58,11 @@ func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) {
}
// LockPOSIX tries to acquire a POSIX-style lock on a file region.
-func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
+func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ rng, err := computeRange(ctx, fd, start, length, whence)
+ if err != nil {
+ return err
+ }
if fl.posix.LockRegion(uid, t, rng, block) {
return nil
}
@@ -67,6 +73,37 @@ func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fsloc
//
// This operation is always successful, even if there did not exist a lock on
// the requested region held by uid in the first place.
-func (fl *FileLocks) UnlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) {
+func (fl *FileLocks) UnlockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ rng, err := computeRange(ctx, fd, start, length, whence)
+ if err != nil {
+ return err
+ }
fl.posix.UnlockRegion(uid, rng)
+ return nil
+}
+
+func computeRange(ctx context.Context, fd *FileDescription, start uint64, length uint64, whence int16) (fslock.LockRange, error) {
+ var off int64
+ switch whence {
+ case linux.SEEK_SET:
+ off = 0
+ case linux.SEEK_CUR:
+ // Note that Linux does not hold any mutexes while retrieving the file
+ // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk.
+ curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR)
+ if err != nil {
+ return fslock.LockRange{}, err
+ }
+ off = curOff
+ case linux.SEEK_END:
+ stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE})
+ if err != nil {
+ return fslock.LockRange{}, err
+ }
+ off = int64(stat.Size)
+ default:
+ return fslock.LockRange{}, syserror.EINVAL
+ }
+
+ return fslock.ComputeRange(int64(start), int64(length), off)
}
diff --git a/pkg/sentry/vfs/lock/BUILD b/pkg/sentry/vfs/lock/BUILD
deleted file mode 100644
index d9ab063b7..000000000
--- a/pkg/sentry/vfs/lock/BUILD
+++ /dev/null
@@ -1,13 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "lock",
- srcs = ["lock.go"],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/sentry/fs/lock",
- "//pkg/syserror",
- ],
-)
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index f9647f90e..afe2be8d7 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -94,6 +94,37 @@ func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, mode linu
return syserror.EACCES
}
+// MayLink determines whether creating a hard link to a file with the given
+// mode, kuid, and kgid is permitted.
+//
+// This corresponds to Linux's fs/namei.c:may_linkat.
+func MayLink(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error {
+ // Source inode owner can hardlink all they like; otherwise, it must be a
+ // safe source.
+ if CanActAsOwner(creds, kuid) {
+ return nil
+ }
+
+ // Only regular files can be hard linked.
+ if mode.FileType() != linux.S_IFREG {
+ return syserror.EPERM
+ }
+
+ // Setuid files should not get pinned to the filesystem.
+ if mode&linux.S_ISUID != 0 {
+ return syserror.EPERM
+ }
+
+ // Executable setgid files should not get pinned to the filesystem, but we
+ // don't support S_IXGRP anyway.
+
+ // Hardlinking to unreadable or unwritable sources is dangerous.
+ if err := GenericCheckPermissions(creds, MayRead|MayWrite, mode, kuid, kgid); err != nil {
+ return syserror.EPERM
+ }
+ return nil
+}
+
// AccessTypesForOpenFlags returns the access types required to open a file
// with the given OpenOptions.Flags. Note that this is NOT the same thing as
// the set of accesses permitted for the opened file:
diff --git a/pkg/tcpip/link/nested/BUILD b/pkg/tcpip/link/nested/BUILD
new file mode 100644
index 000000000..bdd5276ad
--- /dev/null
+++ b/pkg/tcpip/link/nested/BUILD
@@ -0,0 +1,31 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "nested",
+ srcs = [
+ "nested.go",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/sync",
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ "//pkg/tcpip/stack",
+ ],
+)
+
+go_test(
+ name = "nested_test",
+ size = "small",
+ srcs = [
+ "nested_test.go",
+ ],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/link/nested",
+ "//pkg/tcpip/stack",
+ ],
+)
diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go
new file mode 100644
index 000000000..2998f9c4f
--- /dev/null
+++ b/pkg/tcpip/link/nested/nested.go
@@ -0,0 +1,131 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package nested provides helpers to implement the pattern of nested
+// stack.LinkEndpoints.
+package nested
+
+import (
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+// Endpoint is a wrapper around stack.LinkEndpoint and stack.NetworkDispatcher
+// that can be used to implement nesting safely by providing lifecycle
+// concurrency guards.
+//
+// See the tests in this package for example usage.
+type Endpoint struct {
+ child stack.LinkEndpoint
+ embedder stack.NetworkDispatcher
+
+ // mu protects dispatcher.
+ mu sync.RWMutex
+ dispatcher stack.NetworkDispatcher
+}
+
+var _ stack.GSOEndpoint = (*Endpoint)(nil)
+var _ stack.LinkEndpoint = (*Endpoint)(nil)
+var _ stack.NetworkDispatcher = (*Endpoint)(nil)
+
+// Init initializes a nested.Endpoint that uses embedder as the dispatcher for
+// child on Attach.
+//
+// See the tests in this package for example usage.
+func (e *Endpoint) Init(child stack.LinkEndpoint, embedder stack.NetworkDispatcher) {
+ e.child = child
+ e.embedder = embedder
+}
+
+// DeliverNetworkPacket implements stack.NetworkDispatcher.
+func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ e.mu.RLock()
+ d := e.dispatcher
+ e.mu.RUnlock()
+ if d != nil {
+ d.DeliverNetworkPacket(remote, local, protocol, pkt)
+ }
+}
+
+// Attach implements stack.LinkEndpoint.
+func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) {
+ e.mu.Lock()
+ e.dispatcher = dispatcher
+ e.mu.Unlock()
+ // If we're attaching to a valid dispatcher, pass embedder as the dispatcher
+ // to our child, otherwise detach the child by giving it a nil dispatcher.
+ var pass stack.NetworkDispatcher
+ if dispatcher != nil {
+ pass = e.embedder
+ }
+ e.child.Attach(pass)
+}
+
+// IsAttached implements stack.LinkEndpoint.
+func (e *Endpoint) IsAttached() bool {
+ e.mu.RLock()
+ isAttached := e.dispatcher != nil
+ e.mu.RUnlock()
+ return isAttached
+}
+
+// MTU implements stack.LinkEndpoint.
+func (e *Endpoint) MTU() uint32 {
+ return e.child.MTU()
+}
+
+// Capabilities implements stack.LinkEndpoint.
+func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities {
+ return e.child.Capabilities()
+}
+
+// MaxHeaderLength implements stack.LinkEndpoint.
+func (e *Endpoint) MaxHeaderLength() uint16 {
+ return e.child.MaxHeaderLength()
+}
+
+// LinkAddress implements stack.LinkEndpoint.
+func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
+ return e.child.LinkAddress()
+}
+
+// WritePacket implements stack.LinkEndpoint.
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+ return e.child.WritePacket(r, gso, protocol, pkt)
+}
+
+// WritePackets implements stack.LinkEndpoint.
+func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ return e.child.WritePackets(r, gso, pkts, protocol)
+}
+
+// WriteRawPacket implements stack.LinkEndpoint.
+func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ return e.child.WriteRawPacket(vv)
+}
+
+// Wait implements stack.LinkEndpoint.
+func (e *Endpoint) Wait() {
+ e.child.Wait()
+}
+
+// GSOMaxSize implements stack.GSOEndpoint.
+func (e *Endpoint) GSOMaxSize() uint32 {
+ if e, ok := e.child.(stack.GSOEndpoint); ok {
+ return e.GSOMaxSize()
+ }
+ return 0
+}
diff --git a/pkg/tcpip/link/nested/nested_test.go b/pkg/tcpip/link/nested/nested_test.go
new file mode 100644
index 000000000..c1a219f02
--- /dev/null
+++ b/pkg/tcpip/link/nested/nested_test.go
@@ -0,0 +1,105 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nested_test
+
+import (
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/nested"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+type parentEndpoint struct {
+ nested.Endpoint
+}
+
+var _ stack.LinkEndpoint = (*parentEndpoint)(nil)
+var _ stack.NetworkDispatcher = (*parentEndpoint)(nil)
+
+type childEndpoint struct {
+ stack.LinkEndpoint
+ dispatcher stack.NetworkDispatcher
+}
+
+var _ stack.LinkEndpoint = (*childEndpoint)(nil)
+
+func (c *childEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
+ c.dispatcher = dispatcher
+}
+
+func (c *childEndpoint) IsAttached() bool {
+ return c.dispatcher != nil
+}
+
+type counterDispatcher struct {
+ count int
+}
+
+var _ stack.NetworkDispatcher = (*counterDispatcher)(nil)
+
+func (d *counterDispatcher) DeliverNetworkPacket(tcpip.LinkAddress, tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) {
+ d.count++
+}
+
+func TestNestedLinkEndpoint(t *testing.T) {
+ const emptyAddress = tcpip.LinkAddress("")
+
+ var (
+ childEP childEndpoint
+ nestedEP parentEndpoint
+ disp counterDispatcher
+ )
+ nestedEP.Endpoint.Init(&childEP, &nestedEP)
+
+ if childEP.IsAttached() {
+ t.Error("On init, childEP.IsAttached() = true, want = false")
+ }
+ if nestedEP.IsAttached() {
+ t.Error("On init, nestedEP.IsAttached() = true, want = false")
+ }
+
+ nestedEP.Attach(&disp)
+ if disp.count != 0 {
+ t.Fatalf("After attach, got disp.count = %d, want = 0", disp.count)
+ }
+ if !childEP.IsAttached() {
+ t.Error("After attach, childEP.IsAttached() = false, want = true")
+ }
+ if !nestedEP.IsAttached() {
+ t.Error("After attach, nestedEP.IsAttached() = false, want = true")
+ }
+
+ nestedEP.DeliverNetworkPacket(emptyAddress, emptyAddress, header.IPv4ProtocolNumber, &stack.PacketBuffer{})
+ if disp.count != 1 {
+ t.Errorf("After first packet with dispatcher attached, got disp.count = %d, want = 1", disp.count)
+ }
+
+ nestedEP.Attach(nil)
+ if childEP.IsAttached() {
+ t.Error("After detach, childEP.IsAttached() = true, want = false")
+ }
+ if nestedEP.IsAttached() {
+ t.Error("After detach, nestedEP.IsAttached() = true, want = false")
+ }
+
+ disp.count = 0
+ nestedEP.DeliverNetworkPacket(emptyAddress, emptyAddress, header.IPv4ProtocolNumber, &stack.PacketBuffer{})
+ if disp.count != 0 {
+ t.Errorf("After second packet with dispatcher detached, got disp.count = %d, want = 0", disp.count)
+ }
+
+}
diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD
index 230a8d53a..7cbc305e7 100644
--- a/pkg/tcpip/link/sniffer/BUILD
+++ b/pkg/tcpip/link/sniffer/BUILD
@@ -14,6 +14,7 @@ go_library(
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
+ "//pkg/tcpip/link/nested",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index f2e47b6a7..d9cd4e83a 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/nested"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -48,18 +49,21 @@ var LogPackets uint32 = 1
var LogPacketsToPCAP uint32 = 1
type endpoint struct {
- dispatcher stack.NetworkDispatcher
- lower stack.LinkEndpoint
+ nested.Endpoint
writer io.Writer
maxPCAPLen uint32
}
+var _ stack.GSOEndpoint = (*endpoint)(nil)
+var _ stack.LinkEndpoint = (*endpoint)(nil)
+var _ stack.NetworkDispatcher = (*endpoint)(nil)
+
// New creates a new sniffer link-layer endpoint. It wraps around another
// endpoint and logs packets and they traverse the endpoint.
func New(lower stack.LinkEndpoint) stack.LinkEndpoint {
- return &endpoint{
- lower: lower,
- }
+ sniffer := &endpoint{}
+ sniffer.Endpoint.Init(lower, sniffer)
+ return sniffer
}
func zoneOffset() (int32, error) {
@@ -103,11 +107,12 @@ func NewWithWriter(lower stack.LinkEndpoint, writer io.Writer, snapLen uint32) (
if err := writePCAPHeader(writer, snapLen); err != nil {
return nil, err
}
- return &endpoint{
- lower: lower,
+ sniffer := &endpoint{
writer: writer,
maxPCAPLen: snapLen,
- }, nil
+ }
+ sniffer.Endpoint.Init(lower, sniffer)
+ return sniffer, nil
}
// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
@@ -115,50 +120,7 @@ func NewWithWriter(lower stack.LinkEndpoint, writer io.Writer, snapLen uint32) (
// logs the packet before forwarding to the actual dispatcher.
func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
e.dumpPacket("recv", nil, protocol, pkt)
- e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt)
-}
-
-// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher
-// and registers with the lower endpoint as its dispatcher so that "e" is called
-// for inbound packets.
-func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
- e.dispatcher = dispatcher
- e.lower.Attach(e)
-}
-
-// IsAttached implements stack.LinkEndpoint.IsAttached.
-func (e *endpoint) IsAttached() bool {
- return e.dispatcher != nil
-}
-
-// MTU implements stack.LinkEndpoint.MTU. It just forwards the request to the
-// lower endpoint.
-func (e *endpoint) MTU() uint32 {
- return e.lower.MTU()
-}
-
-// Capabilities implements stack.LinkEndpoint.Capabilities. It just forwards the
-// request to the lower endpoint.
-func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
- return e.lower.Capabilities()
-}
-
-// MaxHeaderLength implements the stack.LinkEndpoint interface. It just forwards
-// the request to the lower endpoint.
-func (e *endpoint) MaxHeaderLength() uint16 {
- return e.lower.MaxHeaderLength()
-}
-
-func (e *endpoint) LinkAddress() tcpip.LinkAddress {
- return e.lower.LinkAddress()
-}
-
-// GSOMaxSize returns the maximum GSO packet size.
-func (e *endpoint) GSOMaxSize() uint32 {
- if gso, ok := e.lower.(stack.GSOEndpoint); ok {
- return gso.GSOMaxSize()
- }
- return 0
+ e.Endpoint.DeliverNetworkPacket(remote, local, protocol, pkt)
}
func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
@@ -203,7 +165,7 @@ func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.Netw
// forwards the request to the lower endpoint.
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
e.dumpPacket("send", gso, protocol, pkt)
- return e.lower.WritePacket(r, gso, protocol, pkt)
+ return e.Endpoint.WritePacket(r, gso, protocol, pkt)
}
// WritePackets implements the stack.LinkEndpoint interface. It is called by
@@ -213,7 +175,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
e.dumpPacket("send", gso, protocol, pkt)
}
- return e.lower.WritePackets(r, gso, pkts, protocol)
+ return e.Endpoint.WritePackets(r, gso, pkts, protocol)
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
@@ -221,12 +183,9 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
e.dumpPacket("send", nil, 0, &stack.PacketBuffer{
Data: vv,
})
- return e.lower.WriteRawPacket(vv)
+ return e.Endpoint.WriteRawPacket(vv)
}
-// Wait implements stack.LinkEndpoint.Wait.
-func (e *endpoint) Wait() { e.lower.Wait() }
-
func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
// Figure out the network layer info.
var transProto uint8
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 4e9b404c8..62d4eb1b6 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -170,17 +170,10 @@ func (it *IPTables) GetTable(name string) (Table, bool) {
func (it *IPTables) ReplaceTable(name string, table Table) {
it.mu.Lock()
defer it.mu.Unlock()
+ it.modified = true
it.tables[name] = table
}
-// ModifyTables acquires write-lock and calls fn with internal name-to-table
-// map. This function can be used to update multiple tables atomically.
-func (it *IPTables) ModifyTables(fn func(map[string]Table)) {
- it.mu.Lock()
- defer it.mu.Unlock()
- fn(it.tables)
-}
-
// GetPriorities returns slice of priorities associated with hook.
func (it *IPTables) GetPriorities(hook Hook) []string {
it.mu.RLock()
@@ -209,6 +202,15 @@ const (
//
// Precondition: pkt.NetworkHeader is set.
func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address, nicName string) bool {
+ // Many users never configure iptables. Spare them the cost of rule
+ // traversal if rules have never been set.
+ it.mu.RLock()
+ if !it.modified {
+ it.mu.RUnlock()
+ return true
+ }
+ it.mu.RUnlock()
+
// Packets are manipulated only if connection and matching
// NAT rule exists.
it.connections.HandlePacket(pkt, hook, gso, r)
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 4a6a5c6f1..7026990c4 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -79,11 +79,11 @@ const (
// IPTables holds all the tables for a netstack.
type IPTables struct {
- // mu protects tables and priorities.
+ // mu protects tables, priorities, and modified.
mu sync.RWMutex
- // tables maps table names to tables. User tables have arbitrary names. mu
- // needs to be locked for accessing.
+ // tables maps table names to tables. User tables have arbitrary names.
+ // mu needs to be locked for accessing.
tables map[string]Table
// priorities maps each hook to a list of table names. The order of the
@@ -91,11 +91,16 @@ type IPTables struct {
// hook. mu needs to be locked for accessing.
priorities map[Hook][]string
+ // modified is whether tables have been modified at least once. It is
+ // used to elide the iptables performance overhead for workloads that
+ // don't utilize iptables.
+ modified bool
+
connections ConnTrackTable
}
// A Table defines a set of chains and hooks into the network stack. It is
-// really just a list of rules with some metadata for entrypoints and such.
+// really just a list of rules.
type Table struct {
// Rules holds the rules that make up the table.
Rules []Rule
@@ -110,10 +115,6 @@ type Table struct {
// UserChains holds user-defined chains for the keyed by name. Users
// can give their chains arbitrary names.
UserChains map[string]int
-
- // Metadata holds information about the Table that is useful to users
- // of IPTables, but not to the netstack IPTables code itself.
- metadata interface{}
}
// ValidHooks returns a bitmap of the builtin hooks for the given table.
@@ -125,16 +126,6 @@ func (table *Table) ValidHooks() uint32 {
return hooks
}
-// Metadata returns the metadata object stored in table.
-func (table *Table) Metadata() interface{} {
- return table.metadata
-}
-
-// SetMetadata sets the metadata object stored in table.
-func (table *Table) SetMetadata(metadata interface{}) {
- table.metadata = metadata
-}
-
// A Rule is a packet processing rule. It consists of two pieces. First it
// contains zero or more matchers, each of which is a specification of which
// packets this rule applies to. If there are no matchers in the rule, it
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index ae326b3ab..6f86abc98 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -36,15 +36,24 @@ import (
)
const (
- addr1 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01")
- addr2 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02")
- addr3 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03")
- linkAddr1 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
- linkAddr2 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x07")
- linkAddr3 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x08")
- linkAddr4 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x09")
- defaultTimeout = 100 * time.Millisecond
- defaultAsyncEventTimeout = time.Second
+ addr1 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01")
+ addr2 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02")
+ addr3 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03")
+ linkAddr1 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
+ linkAddr2 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x07")
+ linkAddr3 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x08")
+ linkAddr4 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x09")
+
+ // Extra time to use when waiting for an async event to occur.
+ defaultAsyncPositiveEventTimeout = 10 * time.Second
+
+ // Extra time to use when waiting for an async event to not occur.
+ //
+ // Since a negative check is used to make sure an event did not happen, it is
+ // okay to use a smaller timeout compared to the positive case since execution
+ // stall in regards to the monotonic clock will not affect the expected
+ // outcome.
+ defaultAsyncNegativeEventTimeout = time.Second
)
var (
@@ -442,7 +451,7 @@ func TestDADResolve(t *testing.T) {
// Make sure the address does not resolve before the resolution time has
// passed.
- time.Sleep(test.expectedRetransmitTimer*time.Duration(test.dupAddrDetectTransmits) - defaultAsyncEventTimeout)
+ time.Sleep(test.expectedRetransmitTimer*time.Duration(test.dupAddrDetectTransmits) - defaultAsyncNegativeEventTimeout)
if addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
t.Errorf("got stack.GetMainNICAddress(%d, %d) = (_, %s), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
} else if want := (tcpip.AddressWithPrefix{}); addr != want {
@@ -471,7 +480,7 @@ func TestDADResolve(t *testing.T) {
// Wait for DAD to resolve.
select {
- case <-time.After(2 * defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
if diff := checkDADEvent(e, nicID, addr1, true, nil); diff != "" {
@@ -1169,7 +1178,7 @@ func TestRouterDiscoveryDispatcherNoRemember(t *testing.T) {
select {
case <-ndpDisp.routerC:
t.Fatal("should not have received any router events")
- case <-time.After(lifetimeSeconds*time.Second + defaultTimeout):
+ case <-time.After(lifetimeSeconds*time.Second + defaultAsyncNegativeEventTimeout):
}
}
@@ -1252,7 +1261,7 @@ func TestRouterDiscovery(t *testing.T) {
// Wait for the normal lifetime plus an extra bit for the
// router to get invalidated. If we don't get an invalidation
// event after this time, then something is wrong.
- expectAsyncRouterInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second+defaultAsyncEventTimeout)
+ expectAsyncRouterInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second+defaultAsyncPositiveEventTimeout)
// Rx an RA from lladdr2 with huge lifetime.
e.InjectInbound(header.IPv6ProtocolNumber, raBuf(llAddr2, 1000))
@@ -1269,7 +1278,7 @@ func TestRouterDiscovery(t *testing.T) {
// Wait for the normal lifetime plus an extra bit for the
// router to get invalidated. If we don't get an invalidation
// event after this time, then something is wrong.
- expectAsyncRouterInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second+defaultAsyncEventTimeout)
+ expectAsyncRouterInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second+defaultAsyncPositiveEventTimeout)
}
// TestRouterDiscoveryMaxRouters tests that only
@@ -1418,7 +1427,7 @@ func TestPrefixDiscoveryDispatcherNoRemember(t *testing.T) {
select {
case <-ndpDisp.prefixC:
t.Fatal("should not have received any prefix events")
- case <-time.After(lifetimeSeconds*time.Second + defaultTimeout):
+ case <-time.After(lifetimeSeconds*time.Second + defaultAsyncNegativeEventTimeout):
}
}
@@ -1500,7 +1509,7 @@ func TestPrefixDiscovery(t *testing.T) {
if diff := checkPrefixEvent(e, subnet2, false); diff != "" {
t.Errorf("prefix event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(time.Duration(lifetime)*time.Second + defaultAsyncEventTimeout):
+ case <-time.After(time.Duration(lifetime)*time.Second + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for prefix discovery event")
}
@@ -1565,7 +1574,7 @@ func TestPrefixDiscoveryWithInfiniteLifetime(t *testing.T) {
select {
case <-ndpDisp.prefixC:
t.Fatal("unexpectedly invalidated a prefix with infinite lifetime")
- case <-time.After(testInfiniteLifetime + defaultTimeout):
+ case <-time.After(testInfiniteLifetime + defaultAsyncNegativeEventTimeout):
}
// Receive an RA with finite lifetime.
@@ -1590,7 +1599,7 @@ func TestPrefixDiscoveryWithInfiniteLifetime(t *testing.T) {
select {
case <-ndpDisp.prefixC:
t.Fatal("unexpectedly invalidated a prefix with infinite lifetime")
- case <-time.After(testInfiniteLifetime + defaultTimeout):
+ case <-time.After(testInfiniteLifetime + defaultAsyncNegativeEventTimeout):
}
// Receive an RA with a prefix with a lifetime value greater than the
@@ -1599,7 +1608,7 @@ func TestPrefixDiscoveryWithInfiniteLifetime(t *testing.T) {
select {
case <-ndpDisp.prefixC:
t.Fatal("unexpectedly invalidated a prefix with infinite lifetime")
- case <-time.After((testInfiniteLifetimeSeconds+1)*time.Second + defaultTimeout):
+ case <-time.After((testInfiniteLifetimeSeconds+1)*time.Second + defaultAsyncNegativeEventTimeout):
}
// Receive an RA with 0 lifetime.
@@ -1835,7 +1844,7 @@ func TestAutoGenAddr(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr1, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(newMinVLDuration + defaultAsyncEventTimeout):
+ case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
if containsV6Addr(s.NICInfo()[1].ProtocolAddresses, addr1) {
@@ -1962,7 +1971,7 @@ func TestAutoGenTempAddr(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
}
@@ -1975,7 +1984,7 @@ func TestAutoGenTempAddr(t *testing.T) {
if diff := checkDADEvent(e, nicID, addr, true, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(time.Duration(test.dupAddrTransmits)*test.retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(time.Duration(test.dupAddrTransmits)*test.retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
}
@@ -2081,10 +2090,10 @@ func TestAutoGenTempAddr(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, nextAddr, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
- case <-time.After(newMinVLDuration + defaultTimeout):
+ case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
if mismatch := addressCheck(s.NICInfo()[nicID].ProtocolAddresses, []tcpip.AddressWithPrefix{addr2, tempAddr2}, []tcpip.AddressWithPrefix{addr1, tempAddr1}); mismatch != "" {
@@ -2180,7 +2189,7 @@ func TestNoAutoGenTempAddrForLinkLocal(t *testing.T) {
if diff := checkDADEvent(e, nicID, llAddr1, true, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(time.Duration(test.dupAddrTransmits)*test.retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(time.Duration(test.dupAddrTransmits)*test.retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
@@ -2188,7 +2197,7 @@ func TestNoAutoGenTempAddrForLinkLocal(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Errorf("got unxpected auto gen addr event = %+v", e)
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncNegativeEventTimeout):
}
})
}
@@ -2265,7 +2274,7 @@ func TestNoAutoGenTempAddrWithoutStableAddr(t *testing.T) {
if diff := checkDADEvent(e, nicID, addr.Address, true, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(dadTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
select {
@@ -2273,7 +2282,7 @@ func TestNoAutoGenTempAddrWithoutStableAddr(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, tempAddr, newAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
}
@@ -2363,13 +2372,13 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
}
// Wait for regeneration
- expectAutoGenAddrEventAsync(tempAddr2, newAddr, regenAfter+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAsync(tempAddr2, newAddr, regenAfter+defaultAsyncPositiveEventTimeout)
if mismatch := addressCheck(s.NICInfo()[nicID].ProtocolAddresses, []tcpip.AddressWithPrefix{addr, tempAddr1, tempAddr2}, nil); mismatch != "" {
t.Fatal(mismatch)
}
// Wait for regeneration
- expectAutoGenAddrEventAsync(tempAddr3, newAddr, regenAfter+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAsync(tempAddr3, newAddr, regenAfter+defaultAsyncPositiveEventTimeout)
if mismatch := addressCheck(s.NICInfo()[nicID].ProtocolAddresses, []tcpip.AddressWithPrefix{addr, tempAddr1, tempAddr2, tempAddr3}, nil); mismatch != "" {
t.Fatal(mismatch)
}
@@ -2398,7 +2407,7 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
} else if diff := checkAutoGenAddrEvent(e, addr, invalidatedAddr); diff == "" {
@@ -2407,12 +2416,12 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Fatalf("unexpectedly got an auto-generated event = %+v", e)
- case <-time.After(defaultTimeout):
+ case <-time.After(defaultAsyncNegativeEventTimeout):
}
} else {
t.Fatalf("got unexpected auto-generated event = %+v", e)
}
- case <-time.After(invalidateAfter + defaultAsyncEventTimeout):
+ case <-time.After(invalidateAfter + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
@@ -2517,7 +2526,7 @@ func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Fatalf("unexpected auto gen addr event = %+v", e)
- case <-time.After(regenAfter + defaultAsyncEventTimeout):
+ case <-time.After(regenAfter + defaultAsyncNegativeEventTimeout):
}
// Prefer the prefix again.
@@ -2546,7 +2555,7 @@ func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Fatalf("unexpected auto gen addr event = %+v", e)
- case <-time.After(regenAfter + defaultAsyncEventTimeout):
+ case <-time.After(regenAfter + defaultAsyncNegativeEventTimeout):
}
// Set the maximum lifetimes for temporary addresses such that on the next
@@ -2556,14 +2565,14 @@ func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
// addresses + the time that has already passed since the last address was
// generated so that the regeneration timer is needed to generate the next
// address.
- newLifetimes := newMinVLDuration + regenAfter + defaultAsyncEventTimeout
+ newLifetimes := newMinVLDuration + regenAfter + defaultAsyncNegativeEventTimeout
ndpConfigs.MaxTempAddrValidLifetime = newLifetimes
ndpConfigs.MaxTempAddrPreferredLifetime = newLifetimes
if err := s.SetNDPConfigurations(nicID, ndpConfigs); err != nil {
t.Fatalf("s.SetNDPConfigurations(%d, _): %s", nicID, err)
}
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix, true, true, 100, 100))
- expectAutoGenAddrEventAsync(tempAddr3, newAddr, regenAfter+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAsync(tempAddr3, newAddr, regenAfter+defaultAsyncPositiveEventTimeout)
}
// TestMixedSLAACAddrConflictRegen tests SLAAC address regeneration in response
@@ -2711,7 +2720,7 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
}
@@ -2724,7 +2733,7 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
if diff := checkDADEvent(e, nicID, addr, true, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(dupAddrTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(dupAddrTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
}
@@ -3070,7 +3079,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) {
expectPrimaryAddr(addr1)
// Wait for addr of prefix1 to be deprecated.
- expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
t.Fatalf("should not have %s in the list of addresses", addr1)
}
@@ -3110,7 +3119,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) {
expectPrimaryAddr(addr1)
// Wait for addr of prefix1 to be deprecated.
- expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
t.Fatalf("should not have %s in the list of addresses", addr1)
}
@@ -3124,7 +3133,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) {
}
// Wait for addr of prefix1 to be invalidated.
- expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncEventTimeout)
+ expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncPositiveEventTimeout)
if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
t.Fatalf("should not have %s in the list of addresses", addr1)
}
@@ -3156,7 +3165,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
} else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" {
@@ -3165,12 +3174,12 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) {
select {
case <-ndpDisp.autoGenAddrC:
t.Fatal("unexpectedly got an auto-generated event")
- case <-time.After(defaultTimeout):
+ case <-time.After(defaultAsyncNegativeEventTimeout):
}
} else {
t.Fatalf("got unexpected auto-generated event")
}
- case <-time.After(newMinVLDuration + defaultAsyncEventTimeout):
+ case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
@@ -3295,7 +3304,7 @@ func TestAutoGenAddrFiniteToInfiniteToFiniteVL(t *testing.T) {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(minVLSeconds*time.Second + defaultAsyncEventTimeout):
+ case <-time.After(minVLSeconds*time.Second + defaultAsyncPositiveEventTimeout):
t.Fatal("timeout waiting for addr auto gen event")
}
})
@@ -3439,7 +3448,7 @@ func TestAutoGenAddrValidLifetimeUpdates(t *testing.T) {
select {
case <-ndpDisp.autoGenAddrC:
t.Fatal("unexpectedly received an auto gen addr event")
- case <-time.After(time.Duration(test.evl)*time.Second - defaultAsyncEventTimeout):
+ case <-time.After(time.Duration(test.evl)*time.Second - defaultAsyncNegativeEventTimeout):
}
// Wait for the invalidation event.
@@ -3448,7 +3457,7 @@ func TestAutoGenAddrValidLifetimeUpdates(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(2 * defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timeout waiting for addr auto gen event")
}
})
@@ -3509,7 +3518,7 @@ func TestAutoGenAddrRemoval(t *testing.T) {
select {
case <-ndpDisp.autoGenAddrC:
t.Fatal("unexpectedly received an auto gen addr event")
- case <-time.After(lifetimeSeconds*time.Second + defaultTimeout):
+ case <-time.After(lifetimeSeconds*time.Second + defaultAsyncNegativeEventTimeout):
}
}
@@ -3672,7 +3681,7 @@ func TestAutoGenAddrStaticConflict(t *testing.T) {
select {
case <-ndpDisp.autoGenAddrC:
t.Fatal("unexpectedly received an auto gen addr event")
- case <-time.After(lifetimeSeconds*time.Second + defaultTimeout):
+ case <-time.After(lifetimeSeconds*time.Second + defaultAsyncNegativeEventTimeout):
}
if !containsV6Addr(s.NICInfo()[1].ProtocolAddresses, addr) {
t.Fatalf("Should have %s in the list of addresses", addr1)
@@ -3770,7 +3779,7 @@ func TestAutoGenAddrWithOpaqueIID(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr1, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(validLifetimeSecondPrefix1*time.Second + defaultAsyncEventTimeout):
+ case <-time.After(validLifetimeSecondPrefix1*time.Second + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
@@ -3837,7 +3846,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for addr auto gen event")
}
}
@@ -3863,7 +3872,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
if diff := checkDADEvent(e, nicID, addr, resolved, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(dadTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
}
@@ -4030,7 +4039,7 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Fatalf("unexpectedly got an auto-generated address event = %+v", e)
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncNegativeEventTimeout):
}
})
}
@@ -4149,7 +4158,7 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
select {
case e := <-ndpDisp.autoGenAddrC:
t.Fatalf("unexpectedly got an auto-generated address event = %+v", e)
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncNegativeEventTimeout):
}
})
}
@@ -4251,7 +4260,7 @@ func TestAutoGenAddrContinuesLifetimesAfterRetry(t *testing.T) {
if diff := checkDADEvent(e, nicID, addr.Address, true, nil); diff != "" {
t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(dadTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD event")
}
@@ -4277,7 +4286,7 @@ func TestAutoGenAddrContinuesLifetimesAfterRetry(t *testing.T) {
if diff := checkAutoGenAddrEvent(e, addr, invalidatedAddr); diff != "" {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
- case <-time.After(defaultAsyncEventTimeout):
+ case <-time.After(defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for invalidated auto gen addr event after deprecation")
}
} else {
@@ -4285,7 +4294,7 @@ func TestAutoGenAddrContinuesLifetimesAfterRetry(t *testing.T) {
t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
}
}
- case <-time.After(lifetimeSeconds*time.Second - failureTimer - dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(lifetimeSeconds*time.Second - failureTimer - dadTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for auto gen addr event")
}
}
@@ -4869,7 +4878,7 @@ func TestCleanupNDPState(t *testing.T) {
// Should not get any more events (invalidation timers should have been
// cancelled when the NDP state was cleaned up).
- time.Sleep(lifetimeSeconds*time.Second + defaultTimeout)
+ time.Sleep(lifetimeSeconds*time.Second + defaultAsyncNegativeEventTimeout)
select {
case <-ndpDisp.routerC:
t.Error("unexpected router event")
@@ -5172,24 +5181,24 @@ func TestRouterSolicitation(t *testing.T) {
// Make sure each RS is sent at the right time.
remaining := test.maxRtrSolicit
if remaining > 0 {
- waitForPkt(test.effectiveMaxRtrSolicitDelay + defaultAsyncEventTimeout)
+ waitForPkt(test.effectiveMaxRtrSolicitDelay + defaultAsyncPositiveEventTimeout)
remaining--
}
for ; remaining > 0; remaining-- {
- if test.effectiveRtrSolicitInt > defaultAsyncEventTimeout {
- waitForNothing(test.effectiveRtrSolicitInt - defaultAsyncEventTimeout)
- waitForPkt(2 * defaultAsyncEventTimeout)
+ if test.effectiveRtrSolicitInt > defaultAsyncPositiveEventTimeout {
+ waitForNothing(test.effectiveRtrSolicitInt - defaultAsyncNegativeEventTimeout)
+ waitForPkt(defaultAsyncPositiveEventTimeout)
} else {
- waitForPkt(test.effectiveRtrSolicitInt * defaultAsyncEventTimeout)
+ waitForPkt(test.effectiveRtrSolicitInt + defaultAsyncPositiveEventTimeout)
}
}
// Make sure no more RS.
if test.effectiveRtrSolicitInt > test.effectiveMaxRtrSolicitDelay {
- waitForNothing(test.effectiveRtrSolicitInt + defaultAsyncEventTimeout)
+ waitForNothing(test.effectiveRtrSolicitInt + defaultAsyncNegativeEventTimeout)
} else {
- waitForNothing(test.effectiveMaxRtrSolicitDelay + defaultAsyncEventTimeout)
+ waitForNothing(test.effectiveMaxRtrSolicitDelay + defaultAsyncNegativeEventTimeout)
}
// Make sure the counter got properly
@@ -5305,11 +5314,11 @@ func TestStopStartSolicitingRouters(t *testing.T) {
// Stop soliciting routers.
test.stopFn(t, s, true /* first */)
- ctx, cancel := context.WithTimeout(context.Background(), delay+defaultAsyncEventTimeout)
+ ctx, cancel := context.WithTimeout(context.Background(), delay+defaultAsyncNegativeEventTimeout)
defer cancel()
if _, ok := e.ReadContext(ctx); ok {
// A single RS may have been sent before solicitations were stopped.
- ctx, cancel := context.WithTimeout(context.Background(), interval+defaultAsyncEventTimeout)
+ ctx, cancel := context.WithTimeout(context.Background(), interval+defaultAsyncNegativeEventTimeout)
defer cancel()
if _, ok = e.ReadContext(ctx); ok {
t.Fatal("should not have sent more than one RS message")
@@ -5319,7 +5328,7 @@ func TestStopStartSolicitingRouters(t *testing.T) {
// Stopping router solicitations after it has already been stopped should
// do nothing.
test.stopFn(t, s, false /* first */)
- ctx, cancel = context.WithTimeout(context.Background(), delay+defaultAsyncEventTimeout)
+ ctx, cancel = context.WithTimeout(context.Background(), delay+defaultAsyncNegativeEventTimeout)
defer cancel()
if _, ok := e.ReadContext(ctx); ok {
t.Fatal("unexpectedly got a packet after router solicitation has been stopepd")
@@ -5332,10 +5341,10 @@ func TestStopStartSolicitingRouters(t *testing.T) {
// Start soliciting routers.
test.startFn(t, s)
- waitForPkt(delay + defaultAsyncEventTimeout)
- waitForPkt(interval + defaultAsyncEventTimeout)
- waitForPkt(interval + defaultAsyncEventTimeout)
- ctx, cancel = context.WithTimeout(context.Background(), interval+defaultAsyncEventTimeout)
+ waitForPkt(delay + defaultAsyncPositiveEventTimeout)
+ waitForPkt(interval + defaultAsyncPositiveEventTimeout)
+ waitForPkt(interval + defaultAsyncPositiveEventTimeout)
+ ctx, cancel = context.WithTimeout(context.Background(), interval+defaultAsyncNegativeEventTimeout)
defer cancel()
if _, ok := e.ReadContext(ctx); ok {
t.Fatal("unexpectedly got an extra packet after sending out the expected RSs")
@@ -5344,7 +5353,7 @@ func TestStopStartSolicitingRouters(t *testing.T) {
// Starting router solicitations after it has already completed should do
// nothing.
test.startFn(t, s)
- ctx, cancel = context.WithTimeout(context.Background(), delay+defaultAsyncEventTimeout)
+ ctx, cancel = context.WithTimeout(context.Background(), delay+defaultAsyncNegativeEventTimeout)
defer cancel()
if _, ok := e.ReadContext(ctx); ok {
t.Fatal("unexpectedly got a packet after finishing router solicitations")
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index a2190341c..51abe32a7 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1033,14 +1033,14 @@ func (s *Stack) removeNICLocked(id tcpip.NICID) *tcpip.Error {
// Remove routes in-place. n tracks the number of routes written.
n := 0
for i, r := range s.routeTable {
+ s.routeTable[i] = tcpip.Route{}
if r.NIC != id {
// Keep this route.
- if i > n {
- s.routeTable[n] = r
- }
+ s.routeTable[n] = r
n++
}
}
+
s.routeTable = s.routeTable[:n]
return nic.remove()
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index ffef9bc2c..5aacbf53e 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -3305,7 +3305,7 @@ func TestDoDADWhenNICEnabled(t *testing.T) {
// Wait for DAD to resolve.
select {
- case <-time.After(dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
+ case <-time.After(dadTransmits*retransmitTimer + defaultAsyncPositiveEventTimeout):
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
if diff := checkDADEvent(e, nicID, addr.AddressWithPrefix.Address, true, nil); diff != "" {
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index b7b227328..956232a44 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -813,6 +813,32 @@ type OutOfBandInlineOption int
// a default TTL.
type DefaultTTLOption uint8
+// StackSACKEnabled is used by stack.(*Stack).TransportProtocolOption to
+// enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018.
+type StackSACKEnabled bool
+
+// StackDelayEnabled is used by stack.(Stack*).TransportProtocolOption to
+// enable/disable Nagle's algorithm in TCP.
+type StackDelayEnabled bool
+
+// StackSendBufferSizeOption is used by stack.(Stack*).TransportProtocolOption
+// to get/set the default, min and max send buffer sizes.
+type StackSendBufferSizeOption struct {
+ Min int
+ Default int
+ Max int
+}
+
+// StackReceiveBufferSizeOption is used by
+// stack.(Stack*).TransportProtocolOption to get/set the default, min and max
+// receive buffer sizes.
+type StackReceiveBufferSizeOption struct {
+ Min int
+ Default int
+ Max int
+}
+
+//
// IPPacketInfo is the message struture for IP_PKTINFO.
//
// +stateify savable
@@ -1198,6 +1224,9 @@ type UDPStats struct {
// PacketSendErrors is the number of datagrams failed to be sent.
PacketSendErrors *StatCounter
+
+ // ChecksumErrors is the number of datagrams dropped due to bad checksums.
+ ChecksumErrors *StatCounter
}
// Stats holds statistics about the networking stack.
@@ -1241,6 +1270,9 @@ type ReceiveErrors struct {
// ClosedReceiver is the number of received packets dropped because
// of receiving endpoint state being closed.
ClosedReceiver StatCounter
+
+ // ChecksumErrors is the number of packets dropped due to bad checksums.
+ ChecksumErrors StatCounter
}
// SendErrors collects packet send errors within the transport layer for
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index a406d815e..6a7977259 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -26,6 +26,8 @@
package raw
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -66,16 +68,17 @@ type endpoint struct {
// protected by rcvMu.
rcvMu sync.Mutex `state:"nosave"`
rcvList rawPacketList
- rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
+ rcvBufSizeMax int `state:".(int)"`
rcvClosed bool
// The following fields are protected by mu.
- mu sync.RWMutex `state:"nosave"`
- sndBufSize int
- closed bool
- connected bool
- bound bool
+ mu sync.RWMutex `state:"nosave"`
+ sndBufSize int
+ sndBufSizeMax int
+ closed bool
+ connected bool
+ bound bool
// route is the route to a remote network endpoint. It is set via
// Connect(), and is valid only when conneted is true.
route stack.Route `state:"manual"`
@@ -103,10 +106,21 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
},
waiterQueue: waiterQueue,
rcvBufSizeMax: 32 * 1024,
- sndBufSize: 32 * 1024,
+ sndBufSizeMax: 32 * 1024,
associated: associated,
}
+ // Override with stack defaults.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := s.TransportProtocolOption(transProto, &ss); err == nil {
+ e.sndBufSizeMax = ss.Default
+ }
+
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := s.TransportProtocolOption(transProto, &rs); err == nil {
+ e.rcvBufSizeMax = rs.Default
+ }
+
// Unassociated endpoints are write-only and users call Write() with IP
// headers included. Because they're write-only, We don't need to
// register with the stack.
@@ -523,7 +537,46 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+ switch opt {
+ case tcpip.SendBufferSizeOption:
+ // Make sure the send buffer size is within the min and max
+ // allowed.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := e.stack.TransportProtocolOption(e.TransProto, &ss); err != nil {
+ panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, ss, err))
+ }
+ if v > ss.Max {
+ v = ss.Max
+ }
+ if v < ss.Min {
+ v = ss.Min
+ }
+ e.mu.Lock()
+ e.sndBufSizeMax = v
+ e.mu.Unlock()
+ return nil
+
+ case tcpip.ReceiveBufferSizeOption:
+ // Make sure the receive buffer size is within the min and max
+ // allowed.
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(e.TransProto, &rs); err != nil {
+ panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, rs, err))
+ }
+ if v > rs.Max {
+ v = rs.Max
+ }
+ if v < rs.Min {
+ v = rs.Min
+ }
+ e.rcvMu.Lock()
+ e.rcvBufSizeMax = v
+ e.rcvMu.Unlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
@@ -563,7 +616,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
case tcpip.SendBufferSizeOption:
e.mu.Lock()
- v := e.sndBufSize
+ v := e.sndBufSizeMax
e.mu.Unlock()
return v, nil
@@ -636,7 +689,6 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
e.rcvList.PushBack(packet)
e.rcvBufSize += packet.data.Size()
-
e.rcvMu.Unlock()
e.stats.PacketsReceived.Increment()
// Notify waiters that there's data to be read.
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index e26f01fae..6baeda8e4 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -76,7 +76,7 @@ go_library(
)
go_test(
- name = "tcp_test",
+ name = "tcp_x_test",
size = "medium",
srcs = [
"dual_stack_test.go",
@@ -115,3 +115,11 @@ go_test(
"//pkg/tcpip/seqnum",
],
)
+
+go_test(
+ name = "tcp_test",
+ size = "small",
+ srcs = ["timer_test.go"],
+ library = ":tcp",
+ deps = ["//pkg/sleep"],
+)
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 91ee3b0be..377643b82 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -521,7 +521,7 @@ func (h *handshake) execute() *tcpip.Error {
s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment)
defer s.Done()
- var sackEnabled SACKEnabled
+ var sackEnabled tcpip.StackSACKEnabled
if err := h.ep.stack.TransportProtocolOption(ProtocolNumber, &sackEnabled); err != nil {
// If stack returned an error when checking for SACKEnabled
// status then just default to switching off SACK negotiation.
@@ -1516,6 +1516,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
// Main loop. Handle segments until both send and receive ends of the
// connection have completed.
cleanupOnError := func(err *tcpip.Error) {
+ e.stack.Stats().TCP.CurrentConnected.Decrement()
e.workerCleanup = true
if err != nil {
e.resetConnectionLocked(err)
@@ -1568,11 +1569,14 @@ loop:
reuseTW = e.doTimeWait()
}
- // Mark endpoint as closed.
- if e.EndpointState() != StateError {
- e.transitionToStateCloseLocked()
+ // Handle any StateError transition from StateTimeWait.
+ if e.EndpointState() == StateError {
+ cleanupOnError(nil)
+ return nil
}
+ e.transitionToStateCloseLocked()
+
// Lock released below.
epilogue()
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index f225b00e7..10df2bcd5 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -851,12 +851,12 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
maxSynRetries: DefaultSynRetries,
}
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
e.sndBufSize = ss.Default
}
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
e.rcvBufSize = rs.Default
}
@@ -871,7 +871,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.rcvAutoParams.disabled = !bool(mrb)
}
- var de DelayEnabled
+ var de tcpip.StackDelayEnabled
if err := s.TransportProtocolOption(ProtocolNumber, &de); err == nil && de {
e.SetSockOptBool(tcpip.DelayOption, true)
}
@@ -1588,7 +1588,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
case tcpip.ReceiveBufferSizeOption:
// Make sure the receive buffer size is within the min and max
// allowed.
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
if v < rs.Min {
v = rs.Min
@@ -1638,7 +1638,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
case tcpip.SendBufferSizeOption:
// Make sure the send buffer size is within the min and max
// allowed.
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
if v < ss.Min {
v = ss.Min
@@ -1678,7 +1678,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
return tcpip.ErrInvalidOptionValue
}
}
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
if v < rs.Min/2 {
v = rs.Min / 2
@@ -2609,7 +2609,7 @@ func (e *endpoint) receiveBufferSize() int {
}
func (e *endpoint) maxReceiveBufferSize() int {
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
// As a fallback return the hardcoded max buffer size.
return MaxBufferSize
@@ -2690,7 +2690,7 @@ func timeStampOffset() uint32 {
// if the SYN options indicate that the SACK option was negotiated and the TCP
// stack is configured to enable TCP SACK option.
func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
- var v SACKEnabled
+ var v tcpip.StackSACKEnabled
if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
// Stack doesn't support SACK. So just return.
return
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index cbb779666..0bebec2d1 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -186,7 +186,7 @@ func (e *endpoint) Resume(s *stack.Stack) {
epState := e.origEndpointState
switch epState {
case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max {
panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max))
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 73b8a6782..3cff55afa 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -71,29 +71,6 @@ const (
DefaultSynRetries = 6
)
-// SACKEnabled option can be used to enable SACK support in the TCP
-// protocol. See: https://tools.ietf.org/html/rfc2018.
-type SACKEnabled bool
-
-// DelayEnabled option can be used to enable Nagle's algorithm in the TCP protocol.
-type DelayEnabled bool
-
-// SendBufferSizeOption allows the default, min and max send buffer sizes for
-// TCP endpoints to be queried or configured.
-type SendBufferSizeOption struct {
- Min int
- Default int
- Max int
-}
-
-// ReceiveBufferSizeOption allows the default, min and max receive buffer size
-// for TCP endpoints to be queried or configured.
-type ReceiveBufferSizeOption struct {
- Min int
- Default int
- Max int
-}
-
const (
ccReno = "reno"
ccCubic = "cubic"
@@ -160,8 +137,8 @@ type protocol struct {
mu sync.RWMutex
sackEnabled bool
delayEnabled bool
- sendBufferSize SendBufferSizeOption
- recvBufferSize ReceiveBufferSizeOption
+ sendBufferSize tcpip.StackSendBufferSizeOption
+ recvBufferSize tcpip.StackReceiveBufferSizeOption
congestionControl string
availableCongestionControl []string
moderateReceiveBuffer bool
@@ -272,19 +249,19 @@ func replyWithReset(s *segment, tos, ttl uint8) {
// SetOption implements stack.TransportProtocol.SetOption.
func (p *protocol) SetOption(option interface{}) *tcpip.Error {
switch v := option.(type) {
- case SACKEnabled:
+ case tcpip.StackSACKEnabled:
p.mu.Lock()
p.sackEnabled = bool(v)
p.mu.Unlock()
return nil
- case DelayEnabled:
+ case tcpip.StackDelayEnabled:
p.mu.Lock()
p.delayEnabled = bool(v)
p.mu.Unlock()
return nil
- case SendBufferSizeOption:
+ case tcpip.StackSendBufferSizeOption:
if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
return tcpip.ErrInvalidOptionValue
}
@@ -293,7 +270,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
- case ReceiveBufferSizeOption:
+ case tcpip.StackReceiveBufferSizeOption:
if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
return tcpip.ErrInvalidOptionValue
}
@@ -386,25 +363,25 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
// Option implements stack.TransportProtocol.Option.
func (p *protocol) Option(option interface{}) *tcpip.Error {
switch v := option.(type) {
- case *SACKEnabled:
+ case *tcpip.StackSACKEnabled:
p.mu.RLock()
- *v = SACKEnabled(p.sackEnabled)
+ *v = tcpip.StackSACKEnabled(p.sackEnabled)
p.mu.RUnlock()
return nil
- case *DelayEnabled:
+ case *tcpip.StackDelayEnabled:
p.mu.RLock()
- *v = DelayEnabled(p.delayEnabled)
+ *v = tcpip.StackDelayEnabled(p.delayEnabled)
p.mu.RUnlock()
return nil
- case *SendBufferSizeOption:
+ case *tcpip.StackSendBufferSizeOption:
p.mu.RLock()
*v = p.sendBufferSize
p.mu.RUnlock()
return nil
- case *ReceiveBufferSizeOption:
+ case *tcpip.StackReceiveBufferSizeOption:
p.mu.RLock()
*v = p.recvBufferSize
p.mu.RUnlock()
@@ -514,8 +491,16 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
// NewProtocol returns a TCP transport protocol.
func NewProtocol() stack.TransportProtocol {
return &protocol{
- sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
- recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
+ sendBufferSize: tcpip.StackSendBufferSizeOption{
+ Min: MinBufferSize,
+ Default: DefaultSendBufferSize,
+ Max: MaxBufferSize,
+ },
+ recvBufferSize: tcpip.StackReceiveBufferSizeOption{
+ Min: MinBufferSize,
+ Default: DefaultReceiveBufferSize,
+ Max: MaxBufferSize,
+ },
congestionControl: ccReno,
availableCongestionControl: []string{ccReno, ccCubic},
tcpLingerTimeout: DefaultTCPLingerTimeout,
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index fcc165f17..812e503bc 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -46,8 +46,8 @@ func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint {
func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) {
t.Helper()
- if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enable)); err != nil {
- t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, SACKEnabled(%v) = %v", enable, err)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enable)); err != nil {
+ t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, StackSACKEnabled(%t) = %s", enable, err)
}
}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index c6ffa7a9d..aca6a7951 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2967,6 +2967,9 @@ loop:
if got := c.Stack().Stats().TCP.CurrentEstablished.Value(); got != 0 {
t.Errorf("got stats.TCP.CurrentEstablished.Value() = %d, want = 0", got)
}
+ if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 {
+ t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)
+ }
}
func TestSendOnResetConnection(t *testing.T) {
@@ -3050,6 +3053,9 @@ func TestMaxRetransmitsTimeout(t *testing.T) {
if got := c.Stack().Stats().TCP.EstablishedTimedout.Value(); got != 1 {
t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout.Value() = %d, want = 1", got)
}
+ if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 {
+ t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)
+ }
}
// TestMaxRTO tests if the retransmit interval caps to MaxRTO.
@@ -3981,7 +3987,10 @@ func TestDefaultBufferSizes(t *testing.T) {
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default send buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{
+ Min: 1,
+ Default: tcp.DefaultSendBufferSize * 2,
+ Max: tcp.DefaultSendBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -3995,8 +4004,11 @@ func TestDefaultBufferSizes(t *testing.T) {
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default receive buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %s", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{
+ Min: 1,
+ Default: tcp.DefaultReceiveBufferSize * 3,
+ Max: tcp.DefaultReceiveBufferSize * 30}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
ep.Close()
@@ -4023,11 +4035,11 @@ func TestMinMaxBufferSizes(t *testing.T) {
defer ep.Close()
// Change the min/max values for send/receive
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -4754,6 +4766,9 @@ func TestKeepalive(t *testing.T) {
if got := c.Stack().Stats().TCP.CurrentEstablished.Value(); got != 0 {
t.Errorf("got stats.TCP.CurrentEstablished.Value() = %d, want = 0", got)
}
+ if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 {
+ t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)
+ }
}
func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCookieInUse bool) (irs, iss seqnum.Value) {
@@ -5663,7 +5678,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// the segment queue holding unprocessed packets is limited to 500.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
- if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -5784,7 +5799,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// the segment queue holding unprocessed packets is limited to 300.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
- if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -5926,7 +5941,7 @@ func TestDelayEnabled(t *testing.T) {
checkDelayOption(t, c, false, false) // Delay is disabled by default.
for _, v := range []struct {
- delayEnabled tcp.DelayEnabled
+ delayEnabled tcpip.StackDelayEnabled
wantDelayOption bool
}{
{delayEnabled: false, wantDelayOption: false},
@@ -5941,10 +5956,10 @@ func TestDelayEnabled(t *testing.T) {
}
}
-func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.DelayEnabled, wantDelayOption bool) {
+func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcpip.StackDelayEnabled, wantDelayOption bool) {
t.Helper()
- var gotDelayEnabled tcp.DelayEnabled
+ var gotDelayEnabled tcpip.StackDelayEnabled
if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &gotDelayEnabled); err != nil {
t.Fatalf("TransportProtocolOption(tcp, &gotDelayEnabled) failed: %s", err)
}
@@ -6771,6 +6786,9 @@ func TestTCPUserTimeout(t *testing.T) {
if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want {
t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %d, want = %d", got, want)
}
+ if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 {
+ t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)
+ }
}
func TestKeepaliveWithUserTimeout(t *testing.T) {
@@ -6842,6 +6860,9 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want {
t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %d, want = %d", got, want)
}
+ if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 {
+ t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)
+ }
}
func TestIncreaseWindowOnReceive(t *testing.T) {
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 9721f6caf..9e262c272 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -144,12 +144,12 @@ func New(t *testing.T, mtu uint32) *Context {
})
// Allow minimum send/receive buffer sizes to be 1 during tests.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 1, Default: tcp.DefaultSendBufferSize, Max: 10 * tcp.DefaultSendBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: tcp.DefaultReceiveBufferSize, Max: 10 * tcp.DefaultReceiveBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
// Increase minimum RTO in tests to avoid test flakes due to early
@@ -1091,7 +1091,7 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
// SACKEnabled returns true if the TCP Protocol option SACKEnabled is set to true
// for the Stack in the context.
func (c *Context) SACKEnabled() bool {
- var v tcp.SACKEnabled
+ var v tcpip.StackSACKEnabled
if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &v); err != nil {
// Stack doesn't support SACK. So just return.
return false
diff --git a/pkg/tcpip/transport/tcp/timer.go b/pkg/tcpip/transport/tcp/timer.go
index c70525f27..7981d469b 100644
--- a/pkg/tcpip/transport/tcp/timer.go
+++ b/pkg/tcpip/transport/tcp/timer.go
@@ -85,6 +85,7 @@ func (t *timer) init(w *sleep.Waker) {
// cleanup frees all resources associated with the timer.
func (t *timer) cleanup() {
t.timer.Stop()
+ *t = timer{}
}
// checkExpiration checks if the given timer has actually expired, it should be
diff --git a/pkg/tcpip/transport/tcp/timer_test.go b/pkg/tcpip/transport/tcp/timer_test.go
new file mode 100644
index 000000000..dbd6dff54
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/timer_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import (
+ "testing"
+ "time"
+
+ "gvisor.dev/gvisor/pkg/sleep"
+)
+
+func TestCleanup(t *testing.T) {
+ const (
+ timerDurationSeconds = 2
+ isAssertedTimeoutSeconds = timerDurationSeconds + 1
+ )
+
+ tmr := timer{}
+ w := sleep.Waker{}
+ tmr.init(&w)
+ tmr.enable(timerDurationSeconds * time.Second)
+ tmr.cleanup()
+
+ if want := (timer{}); tmr != want {
+ t.Errorf("got tmr = %+v, want = %+v", tmr, want)
+ }
+
+ // The waker should not be asserted.
+ for i := 0; i < isAssertedTimeoutSeconds; i++ {
+ time.Sleep(time.Second)
+ if w.IsAsserted() {
+ t.Fatalf("waker asserted unexpectedly")
+ }
+ }
+}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index df5efbf6a..40d66ef09 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -15,6 +15,8 @@
package udp
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -94,6 +96,7 @@ type endpoint struct {
// The following fields are protected by the mu mutex.
mu sync.RWMutex `state:"nosave"`
sndBufSize int
+ sndBufSizeMax int
state EndpointState
route stack.Route `state:"manual"`
dstPort uint16
@@ -159,7 +162,7 @@ type multicastMembership struct {
}
func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
- return &endpoint{
+ e := &endpoint{
stack: s,
TransportEndpointInfo: stack.TransportEndpointInfo{
NetProto: netProto,
@@ -181,10 +184,23 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
multicastTTL: 1,
multicastLoop: true,
rcvBufSizeMax: 32 * 1024,
- sndBufSize: 32 * 1024,
+ sndBufSizeMax: 32 * 1024,
state: StateInitial,
uniqueID: s.UniqueID(),
}
+
+ // Override with stack defaults.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
+ e.sndBufSizeMax = ss.Default
+ }
+
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
+ e.rcvBufSizeMax = rs.Default
+ }
+
+ return e
}
// UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -611,8 +627,43 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.mu.Unlock()
case tcpip.ReceiveBufferSizeOption:
+ // Make sure the receive buffer size is within the min and max
+ // allowed.
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, rs, err))
+ }
+
+ if v < rs.Min {
+ v = rs.Min
+ }
+ if v > rs.Max {
+ v = rs.Max
+ }
+
+ e.mu.Lock()
+ e.rcvBufSizeMax = v
+ e.mu.Unlock()
+ return nil
case tcpip.SendBufferSizeOption:
+ // Make sure the send buffer size is within the min and max
+ // allowed.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, ss, err))
+ }
+
+ if v < ss.Min {
+ v = ss.Min
+ }
+ if v > ss.Max {
+ v = ss.Max
+ }
+ e.mu.Lock()
+ e.sndBufSizeMax = v
+ e.mu.Unlock()
+ return nil
}
return nil
@@ -861,7 +912,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
case tcpip.SendBufferSizeOption:
e.mu.Lock()
- v := e.sndBufSize
+ v := e.sndBufSizeMax
e.mu.Unlock()
return v, nil
@@ -1299,6 +1350,24 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
return
}
+ // Verify checksum unless RX checksum offload is enabled.
+ // On IPv4, UDP checksum is optional, and a zero value means
+ // the transmitter omitted the checksum generation (RFC768).
+ // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
+ if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 &&
+ (hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) {
+ xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length())
+ for _, v := range pkt.Data.Views() {
+ xsum = header.Checksum(v, xsum)
+ }
+ if hdr.CalculateChecksum(xsum) != 0xffff {
+ // Checksum Error.
+ e.stack.Stats().UDP.ChecksumErrors.Increment()
+ e.stats.ReceiveErrors.ChecksumErrors.Increment()
+ return
+ }
+ }
+
e.rcvMu.Lock()
e.stack.Stats().UDP.PacketsReceived.Increment()
e.stats.PacketsReceived.Increment()
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 4218e7d03..fc93f93c0 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -21,6 +21,7 @@
package udp
import (
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -32,9 +33,27 @@ import (
const (
// ProtocolNumber is the udp protocol number.
ProtocolNumber = header.UDPProtocolNumber
+
+ // MinBufferSize is the smallest size of a receive or send buffer.
+ MinBufferSize = 4 << 10 // 4KiB bytes.
+
+ // DefaultSendBufferSize is the default size of the send buffer for
+ // an endpoint.
+ DefaultSendBufferSize = 32 << 10 // 32KiB
+
+ // DefaultReceiveBufferSize is the default size of the receive buffer
+ // for an endpoint.
+ DefaultReceiveBufferSize = 32 << 10 // 32KiB
+
+ // MaxBufferSize is the largest size a receive/send buffer can grow to.
+ MaxBufferSize = 4 << 20 // 4MiB
)
-type protocol struct{}
+type protocol struct {
+ mu sync.RWMutex
+ sendBufferSize tcpip.StackSendBufferSizeOption
+ recvBufferSize tcpip.StackReceiveBufferSizeOption
+}
// Number returns the udp protocol number.
func (*protocol) Number() tcpip.TransportProtocolNumber {
@@ -183,13 +202,49 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
}
// SetOption implements stack.TransportProtocol.SetOption.
-func (*protocol) SetOption(option interface{}) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+ switch v := option.(type) {
+ case tcpip.StackSendBufferSizeOption:
+ if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.sendBufferSize = v
+ p.mu.Unlock()
+ return nil
+
+ case tcpip.StackReceiveBufferSizeOption:
+ if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.recvBufferSize = v
+ p.mu.Unlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// Option implements stack.TransportProtocol.Option.
-func (*protocol) Option(option interface{}) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+func (p *protocol) Option(option interface{}) *tcpip.Error {
+ switch v := option.(type) {
+ case *tcpip.StackSendBufferSizeOption:
+ p.mu.RLock()
+ *v = p.sendBufferSize
+ p.mu.RUnlock()
+ return nil
+
+ case *tcpip.StackReceiveBufferSizeOption:
+ p.mu.RLock()
+ *v = p.recvBufferSize
+ p.mu.RUnlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// Close implements stack.TransportProtocol.Close.
@@ -212,5 +267,8 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
// NewProtocol returns a UDP transport protocol.
func NewProtocol() stack.TransportProtocol {
- return &protocol{}
+ return &protocol{
+ sendBufferSize: tcpip.StackSendBufferSizeOption{Min: MinBufferSize, Default: DefaultSendBufferSize, Max: MaxBufferSize},
+ recvBufferSize: tcpip.StackReceiveBufferSizeOption{Min: MinBufferSize, Default: DefaultReceiveBufferSize, Max: MaxBufferSize},
+ }
}
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 313a3f117..ff9f60cf9 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -292,15 +292,15 @@ func newDualTestContextWithOptions(t *testing.T, mtu uint32, options stack.Optio
wep = sniffer.New(ep)
}
if err := s.CreateNIC(1, wep); err != nil {
- t.Fatalf("CreateNIC failed: %v", err)
+ t.Fatalf("CreateNIC failed: %s", err)
}
if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr); err != nil {
- t.Fatalf("AddAddress failed: %v", err)
+ t.Fatalf("AddAddress failed: %s", err)
}
if err := s.AddAddress(1, ipv6.ProtocolNumber, stackV6Addr); err != nil {
- t.Fatalf("AddAddress failed: %v", err)
+ t.Fatalf("AddAddress failed: %s", err)
}
s.SetRouteTable([]tcpip.Route{
@@ -391,17 +391,21 @@ func (c *testContext) injectPacket(flow testFlow, payload []byte) {
h := flow.header4Tuple(incoming)
if flow.isV4() {
- c.injectV4Packet(payload, &h, true /* valid */)
+ buf := c.buildV4Packet(payload, &h)
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
} else {
- c.injectV6Packet(payload, &h, true /* valid */)
+ buf := c.buildV6Packet(payload, &h)
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
}
}
-// injectV6Packet creates a V6 test packet with the given payload and header
-// values, and injects it into the link endpoint. valid indicates if the
-// caller intends to inject a packet with a valid or an invalid UDP header.
-// We can invalidate the header by corrupting the UDP payload length.
-func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool) {
+// buildV6Packet creates a V6 test packet with the given payload and header
+// values in a buffer.
+func (c *testContext) buildV6Packet(payload []byte, h *header4Tuple) buffer.View {
// Allocate a buffer for data and headers.
buf := buffer.NewView(header.UDPMinimumSize + header.IPv6MinimumSize + len(payload))
payloadStart := len(buf) - len(payload)
@@ -420,16 +424,10 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool
// Initialize the UDP header.
u := header.UDP(buf[header.IPv6MinimumSize:])
- l := uint16(header.UDPMinimumSize + len(payload))
- if !valid {
- // Change the UDP payload length to corrupt the header
- // as requested by the caller.
- l++
- }
u.Encode(&header.UDPFields{
SrcPort: h.srcAddr.Port,
DstPort: h.dstAddr.Port,
- Length: l,
+ Length: uint16(header.UDPMinimumSize + len(payload)),
})
// Calculate the UDP pseudo-header checksum.
@@ -439,17 +437,12 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool
xsum = header.Checksum(payload, xsum)
u.SetChecksum(^u.CalculateChecksum(xsum))
- // Inject packet.
- c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
- Data: buf.ToVectorisedView(),
- })
+ return buf
}
-// injectV4Packet creates a V4 test packet with the given payload and header
-// values, and injects it into the link endpoint. valid indicates if the
-// caller intends to inject a packet with a valid or an invalid UDP header.
-// We can invalidate the header by corrupting the UDP payload length.
-func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool) {
+// buildV4Packet creates a V4 test packet with the given payload and header
+// values in a buffer.
+func (c *testContext) buildV4Packet(payload []byte, h *header4Tuple) buffer.View {
// Allocate a buffer for data and headers.
buf := buffer.NewView(header.UDPMinimumSize + header.IPv4MinimumSize + len(payload))
payloadStart := len(buf) - len(payload)
@@ -483,11 +476,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool
xsum = header.Checksum(payload, xsum)
u.SetChecksum(^u.CalculateChecksum(xsum))
- // Inject packet.
-
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
- Data: buf.ToVectorisedView(),
- })
+ return buf
}
func newPayload() []byte {
@@ -509,7 +498,7 @@ func TestBindToDeviceOption(t *testing.T) {
ep, err := s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
- t.Fatalf("NewEndpoint failed; %v", err)
+ t.Fatalf("NewEndpoint failed; %s", err)
}
defer ep.Close()
@@ -643,7 +632,7 @@ func TestBindEphemeralPort(t *testing.T) {
c.createEndpoint(ipv6.ProtocolNumber)
if err := c.ep.Bind(tcpip.FullAddress{}); err != nil {
- t.Fatalf("ep.Bind(...) failed: %v", err)
+ t.Fatalf("ep.Bind(...) failed: %s", err)
}
}
@@ -654,19 +643,19 @@ func TestBindReservedPort(t *testing.T) {
c.createEndpoint(ipv6.ProtocolNumber)
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
addr, err := c.ep.GetLocalAddress()
if err != nil {
- t.Fatalf("GetLocalAddress failed: %v", err)
+ t.Fatalf("GetLocalAddress failed: %s", err)
}
// We can't bind the address reserved by the connected endpoint above.
{
ep, err := c.s.NewEndpoint(udp.ProtocolNumber, ipv6.ProtocolNumber, &c.wq)
if err != nil {
- t.Fatalf("NewEndpoint failed: %v", err)
+ t.Fatalf("NewEndpoint failed: %s", err)
}
defer ep.Close()
if got, want := ep.Bind(addr), tcpip.ErrPortInUse; got != want {
@@ -677,7 +666,7 @@ func TestBindReservedPort(t *testing.T) {
func() {
ep, err := c.s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &c.wq)
if err != nil {
- t.Fatalf("NewEndpoint failed: %v", err)
+ t.Fatalf("NewEndpoint failed: %s", err)
}
defer ep.Close()
// We can't bind ipv4-any on the port reserved by the connected endpoint
@@ -687,7 +676,7 @@ func TestBindReservedPort(t *testing.T) {
}
// We can bind an ipv4 address on this port, though.
if err := ep.Bind(tcpip.FullAddress{Addr: stackAddr, Port: addr.Port}); err != nil {
- t.Fatalf("ep.Bind(...) failed: %v", err)
+ t.Fatalf("ep.Bind(...) failed: %s", err)
}
}()
@@ -697,11 +686,11 @@ func TestBindReservedPort(t *testing.T) {
func() {
ep, err := c.s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &c.wq)
if err != nil {
- t.Fatalf("NewEndpoint failed: %v", err)
+ t.Fatalf("NewEndpoint failed: %s", err)
}
defer ep.Close()
if err := ep.Bind(tcpip.FullAddress{Port: addr.Port}); err != nil {
- t.Fatalf("ep.Bind(...) failed: %v", err)
+ t.Fatalf("ep.Bind(...) failed: %s", err)
}
}()
}
@@ -714,7 +703,7 @@ func TestV4ReadOnV6(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Test acceptance.
@@ -729,7 +718,7 @@ func TestV4ReadOnBoundToV4MappedWildcard(t *testing.T) {
// Bind to v4 mapped wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Addr: v4MappedWildcardAddr, Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Test acceptance.
@@ -744,7 +733,7 @@ func TestV4ReadOnBoundToV4Mapped(t *testing.T) {
// Bind to local address.
if err := c.ep.Bind(tcpip.FullAddress{Addr: stackV4MappedAddr, Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Test acceptance.
@@ -759,7 +748,7 @@ func TestV6ReadOnV6(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Test acceptance.
@@ -796,7 +785,10 @@ func TestV4ReadSelfSource(t *testing.T) {
h := unicastV4.header4Tuple(incoming)
h.srcAddr = h.dstAddr
- c.injectV4Packet(payload, &h, true /* valid */)
+ buf := c.buildV4Packet(payload, &h)
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
if got := c.s.Stats().IP.InvalidSourceAddressesReceived.Value(); got != tt.wantInvalidSource {
t.Errorf("c.s.Stats().IP.InvalidSourceAddressesReceived got %d, want %d", got, tt.wantInvalidSource)
@@ -817,7 +809,7 @@ func TestV4ReadOnV4(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Test acceptance.
@@ -955,7 +947,7 @@ func testWriteInternal(c *testContext, flow testFlow, setDest bool, checkers ...
payload := buffer.View(newPayload())
n, _, err := c.ep.Write(tcpip.SlicePayload(payload), writeOpts)
if err != nil {
- c.t.Fatalf("Write failed: %v", err)
+ c.t.Fatalf("Write failed: %s", err)
}
if n != int64(len(payload)) {
c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
@@ -1005,7 +997,7 @@ func TestDualWriteBoundToWildcard(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
p := testDualWrite(c)
@@ -1022,7 +1014,7 @@ func TestDualWriteConnectedToV6(t *testing.T) {
// Connect to v6 address.
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
testWrite(c, unicastV6)
@@ -1043,7 +1035,7 @@ func TestDualWriteConnectedToV4Mapped(t *testing.T) {
// Connect to v4 mapped address.
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
testWrite(c, unicastV4in6)
@@ -1070,7 +1062,7 @@ func TestV6WriteOnBoundToV4Mapped(t *testing.T) {
// Bind to v4 mapped address.
if err := c.ep.Bind(tcpip.FullAddress{Addr: stackV4MappedAddr, Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
// Write to v6 address.
@@ -1085,7 +1077,7 @@ func TestV6WriteOnConnected(t *testing.T) {
// Connect to v6 address.
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
testWriteWithoutDestination(c, unicastV6)
@@ -1099,7 +1091,7 @@ func TestV4WriteOnConnected(t *testing.T) {
// Connect to v4 mapped address.
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort}); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
testWriteWithoutDestination(c, unicastV4)
@@ -1234,7 +1226,7 @@ func TestReadIncrementsPacketsReceived(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
testRead(c, unicastV4)
@@ -1506,12 +1498,12 @@ func TestMulticastInterfaceOption(t *testing.T) {
Port: stackPort,
}
if err := c.ep.Connect(addr); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
}
if err := c.ep.SetSockOpt(ifoptSet); err != nil {
- c.t.Fatalf("SetSockOpt failed: %v", err)
+ c.t.Fatalf("SetSockOpt failed: %s", err)
}
// Verify multicast interface addr and NIC were set correctly.
@@ -1519,7 +1511,7 @@ func TestMulticastInterfaceOption(t *testing.T) {
ifoptWant := tcpip.MulticastInterfaceOption{NIC: 1, InterfaceAddr: ifoptSet.InterfaceAddr}
var ifoptGot tcpip.MulticastInterfaceOption
if err := c.ep.GetSockOpt(&ifoptGot); err != nil {
- c.t.Fatalf("GetSockOpt failed: %v", err)
+ c.t.Fatalf("GetSockOpt failed: %s", err)
}
if ifoptGot != ifoptWant {
c.t.Errorf("got GetSockOpt() = %#v, want = %#v", ifoptGot, ifoptWant)
@@ -1691,7 +1683,7 @@ func TestV6UnknownDestination(t *testing.T) {
}
// TestIncrementMalformedPacketsReceived verifies if the malformed received
-// global and endpoint stats get incremented.
+// global and endpoint stats are incremented.
func TestIncrementMalformedPacketsReceived(t *testing.T) {
c := newDualTestContext(t, defaultMTU)
defer c.cleanup()
@@ -1699,20 +1691,25 @@ func TestIncrementMalformedPacketsReceived(t *testing.T) {
c.createEndpoint(ipv6.ProtocolNumber)
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
payload := newPayload()
- c.t.Helper()
h := unicastV6.header4Tuple(incoming)
- c.injectV6Packet(payload, &h, false /* !valid */)
+ buf := c.buildV6Packet(payload, &h)
+ // Invalidate the packet length field in the UDP header by adding one.
+ u := header.UDP(buf[header.IPv6MinimumSize:])
+ u.SetLength(u.Length() + 1)
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
- var want uint64 = 1
+ const want = 1
if got := c.s.Stats().UDP.MalformedPacketsReceived.Value(); got != want {
- t.Errorf("got stats.UDP.MalformedPacketsReceived.Value() = %v, want = %v", got, want)
+ t.Errorf("got stats.UDP.MalformedPacketsReceived.Value() = %d, want = %d", got, want)
}
if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.MalformedPacketsReceived.Value(); got != want {
- t.Errorf("got EP Stats.ReceiveErrors.MalformedPacketsReceived stats = %v, want = %v", got, want)
+ t.Errorf("got EP Stats.ReceiveErrors.MalformedPacketsReceived stats = %d, want = %d", got, want)
}
}
@@ -1728,7 +1725,6 @@ func TestShortHeader(t *testing.T) {
c.t.Fatalf("Bind failed: %s", err)
}
- c.t.Helper()
h := unicastV6.header4Tuple(incoming)
// Allocate a buffer for an IPv6 and too-short UDP header.
@@ -1768,6 +1764,190 @@ func TestShortHeader(t *testing.T) {
}
}
+// TestIncrementChecksumErrorsV4 verifies if a checksum error is detected,
+// global and endpoint stats are incremented.
+func TestIncrementChecksumErrorsV4(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv4.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV4.header4Tuple(incoming)
+ buf := c.buildV4Packet(payload, &h)
+ // Invalidate the checksum field in the UDP header by adding one.
+ u := header.UDP(buf[header.IPv4MinimumSize:])
+ u.SetChecksum(u.Checksum() + 1)
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 1
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
+// TestIncrementChecksumErrorsV6 verifies if a checksum error is detected,
+// global and endpoint stats are incremented.
+func TestIncrementChecksumErrorsV6(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv6.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV6.header4Tuple(incoming)
+ buf := c.buildV6Packet(payload, &h)
+ // Invalidate the checksum field in the UDP header by adding one.
+ u := header.UDP(buf[header.IPv6MinimumSize:])
+ u.SetChecksum(u.Checksum() + 1)
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 1
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
+// TestPayloadModifiedV4 verifies if a checksum error is detected,
+// global and endpoint stats are incremented.
+func TestPayloadModifiedV4(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv4.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV4.header4Tuple(incoming)
+ buf := c.buildV4Packet(payload, &h)
+ // Modify the payload so that the checksum value in the UDP header will be incorrect.
+ buf[len(buf)-1]++
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 1
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
+// TestPayloadModifiedV6 verifies if a checksum error is detected,
+// global and endpoint stats are incremented.
+func TestPayloadModifiedV6(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv6.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV6.header4Tuple(incoming)
+ buf := c.buildV6Packet(payload, &h)
+ // Modify the payload so that the checksum value in the UDP header will be incorrect.
+ buf[len(buf)-1]++
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 1
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
+// TestChecksumZeroV4 verifies if the checksum value is zero, global and
+// endpoint states are *not* incremented (UDP checksum is optional on IPv4).
+func TestChecksumZeroV4(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv4.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV4.header4Tuple(incoming)
+ buf := c.buildV4Packet(payload, &h)
+ // Set the checksum field in the UDP header to zero.
+ u := header.UDP(buf[header.IPv4MinimumSize:])
+ u.SetChecksum(0)
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 0
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
+// TestChecksumZeroV6 verifies if the checksum value is zero, global and
+// endpoint states are incremented (UDP checksum is *not* optional on IPv6).
+func TestChecksumZeroV6(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpoint(ipv6.ProtocolNumber)
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ payload := newPayload()
+ h := unicastV6.header4Tuple(incoming)
+ buf := c.buildV6Packet(payload, &h)
+ // Set the checksum field in the UDP header to zero.
+ u := header.UDP(buf[header.IPv6MinimumSize:])
+ u.SetChecksum(0)
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+ Data: buf.ToVectorisedView(),
+ })
+
+ const want = 1
+ if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+ t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+ }
+ if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+ t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+ }
+}
+
// TestShutdownRead verifies endpoint read shutdown and error
// stats increment on packet receive.
func TestShutdownRead(t *testing.T) {
@@ -1778,15 +1958,15 @@ func TestShutdownRead(t *testing.T) {
// Bind to wildcard.
if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
- c.t.Fatalf("Bind failed: %v", err)
+ c.t.Fatalf("Bind failed: %s", err)
}
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
if err := c.ep.Shutdown(tcpip.ShutdownRead); err != nil {
- t.Fatalf("Shutdown failed: %v", err)
+ t.Fatalf("Shutdown failed: %s", err)
}
testFailingRead(c, unicastV6, true /* expectReadError */)
@@ -1809,11 +1989,11 @@ func TestShutdownWrite(t *testing.T) {
c.createEndpoint(ipv6.ProtocolNumber)
if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
- c.t.Fatalf("Connect failed: %v", err)
+ c.t.Fatalf("Connect failed: %s", err)
}
if err := c.ep.Shutdown(tcpip.ShutdownWrite); err != nil {
- t.Fatalf("Shutdown failed: %v", err)
+ t.Fatalf("Shutdown failed: %s", err)
}
testFailingWrite(c, unicastV6, tcpip.ErrClosedForSend)
diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go
index c45d2ecbc..819dd0a59 100644
--- a/pkg/test/dockerutil/dockerutil.go
+++ b/pkg/test/dockerutil/dockerutil.go
@@ -210,7 +210,6 @@ type Docker struct {
Runtime string
Name string
copyErr error
- mounts []string
cleanups []func()
}
@@ -229,13 +228,8 @@ func MakeDocker(logger testutil.Logger) *Docker {
}
}
-// Mount mounts the given source and makes it available in the container.
-func (d *Docker) Mount(target, source string, mode MountMode) {
- d.mounts = append(d.mounts, fmt.Sprintf("-v=%s:%s:%v", source, target, mode))
-}
-
// CopyFiles copies in and mounts the given files. They are always ReadOnly.
-func (d *Docker) CopyFiles(target string, sources ...string) {
+func (d *Docker) CopyFiles(opts *RunOpts, targetDir string, sources ...string) {
dir, err := ioutil.TempDir("", d.Name)
if err != nil {
d.copyErr = fmt.Errorf("ioutil.TempDir failed: %v", err)
@@ -259,12 +253,33 @@ func (d *Docker) CopyFiles(target string, sources ...string) {
}
d.logger.Logf("copy: %s -> %s", src, dst)
}
- d.Mount(target, dir, ReadOnly)
+ opts.Mounts = append(opts.Mounts, Mount{
+ Source: dir,
+ Target: targetDir,
+ Mode: ReadOnly,
+ })
+}
+
+// Mount describes a mount point inside the container.
+type Mount struct {
+ // Source is the path outside the container.
+ Source string
+
+ // Target is the path inside the container.
+ Target string
+
+ // Mode tells whether the mount inside the container should be readonly.
+ Mode MountMode
}
-// Link links the given target.
-func (d *Docker) Link(target string, source *Docker) {
- d.mounts = append(d.mounts, fmt.Sprintf("--link=%s:%s", source.Name, target))
+// Link informs dockers that a given container needs to be made accessible from
+// the container being configured.
+type Link struct {
+ // Source is the container to connect to.
+ Source *Docker
+
+ // Target is the alias for the container.
+ Target string
}
// RunOpts are options for running a container.
@@ -310,6 +325,12 @@ type RunOpts struct {
// return value from the Run function.
Foreground bool
+ // Mounts is the list of directories/files to be mounted inside the container.
+ Mounts []Mount
+
+ // Links is the list of containers to be connected to the container.
+ Links []Link
+
// Extra are extra arguments that may be passed.
Extra []string
}
@@ -368,7 +389,13 @@ func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) {
if isExec {
rv = append(rv, d.Name)
} else {
- rv = append(rv, d.mounts...)
+ for _, m := range r.Mounts {
+ rv = append(rv, fmt.Sprintf("-v=%s:%s:%v", m.Source, m.Target, m.Mode))
+ }
+ for _, l := range r.Links {
+ rv = append(rv, fmt.Sprintf("--link=%s:%s", l.Source.Name, l.Target))
+ }
+
if len(d.Runtime) > 0 {
rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime))
}
@@ -501,8 +528,6 @@ func (d *Docker) CleanUp() {
if err := d.Remove(); err != nil {
d.logger.Logf("error removing container %q: %v", d.Name, err)
}
- // Forget all mounts.
- d.mounts = nil
// Execute all cleanups.
for _, c := range d.cleanups {
c()
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index b05a8bd45..081db39c1 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1058,8 +1058,8 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
})}
// Enable SACK Recovery.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
- return nil, fmt.Errorf("failed to enable SACK: %v", err)
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(true)); err != nil {
+ return nil, fmt.Errorf("failed to enable SACK: %s", err)
}
// Set default TTLs as required by socket/netstack.
@@ -1068,11 +1068,9 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
// Enable Receive Buffer Auto-Tuning.
if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
}
- s.FillIPTablesMetadata()
-
return &s, nil
}
diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh
index 2a8c24907..8299a7c8b 100755
--- a/scripts/iptables_tests.sh
+++ b/scripts/iptables_tests.sh
@@ -18,6 +18,9 @@ source $(dirname $0)/common.sh
make load-iptables
+# Needed by ip6tables.
+sudo modprobe ip6table_filter
+
install_runsc_for_test iptables --net-raw
test //test/iptables:iptables_test "--test_arg=--runtime=runc"
test //test/iptables:iptables_test "--test_arg=--runtime=${RUNTIME}"
diff --git a/test/README.md b/test/README.md
index 97fe7ea04..02bbf42ff 100644
--- a/test/README.md
+++ b/test/README.md
@@ -24,11 +24,11 @@ also used to run these tests in `kokoro`.
To run image and integration tests, run:
-`./scripts/docker_test.sh`
+`./scripts/docker_tests.sh`
To run root tests, run:
-`./scripts/root_test.sh`
+`./scripts/root_tests.sh`
There are a few other interesting variations for image and integration tests:
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
index 91c956e10..60e739c6a 100644
--- a/test/e2e/integration_test.go
+++ b/test/e2e/integration_test.go
@@ -24,10 +24,12 @@ package integration
import (
"flag"
"fmt"
+ "io/ioutil"
"net"
"net/http"
"os"
"os/exec"
+ "path/filepath"
"strconv"
"strings"
"syscall"
@@ -389,6 +391,37 @@ func TestTmpFile(t *testing.T) {
}
}
+// TestTmpMount checks that mounts inside '/tmp' are not overridden.
+func TestTmpMount(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "tmp-mount")
+ if err != nil {
+ t.Fatalf("TempDir(): %v", err)
+ }
+ want := "123"
+ if err := ioutil.WriteFile(filepath.Join(dir, "file.txt"), []byte("123"), 0666); err != nil {
+ t.Fatalf("WriteFile(): %v", err)
+ }
+ d := dockerutil.MakeDocker(t)
+ defer d.CleanUp()
+
+ opts := dockerutil.RunOpts{
+ Image: "basic/alpine",
+ Mounts: []dockerutil.Mount{
+ {
+ Source: dir,
+ Target: "/tmp/foo",
+ },
+ },
+ }
+ got, err := d.Run(opts, "cat", "/tmp/foo/file.txt")
+ if err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+ if want != got {
+ t.Errorf("invalid file content, want: %q, got: %q", want, got)
+ }
+}
+
// TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes
// runsc to hide the incoherence of FDs opened before and after overlayfs
// copy-up on the host.
diff --git a/test/image/image_test.go b/test/image/image_test.go
index 2e3543109..3e4321480 100644
--- a/test/image/image_test.go
+++ b/test/image/image_test.go
@@ -111,11 +111,12 @@ func TestHttpd(t *testing.T) {
defer d.CleanUp()
// Start the container.
- d.CopyFiles("/usr/local/apache2/htdocs", "test/image/latin10k.txt")
- if err := d.Spawn(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: "basic/httpd",
Ports: []int{80},
- }); err != nil {
+ }
+ d.CopyFiles(&opts, "/usr/local/apache2/htdocs", "test/image/latin10k.txt")
+ if err := d.Spawn(opts); err != nil {
t.Fatalf("docker run failed: %v", err)
}
@@ -138,11 +139,12 @@ func TestNginx(t *testing.T) {
defer d.CleanUp()
// Start the container.
- d.CopyFiles("/usr/share/nginx/html", "test/image/latin10k.txt")
- if err := d.Spawn(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: "basic/nginx",
Ports: []int{80},
- }); err != nil {
+ }
+ d.CopyFiles(&opts, "/usr/share/nginx/html", "test/image/latin10k.txt")
+ if err := d.Spawn(opts); err != nil {
t.Fatalf("docker run failed: %v", err)
}
@@ -183,11 +185,17 @@ func TestMysql(t *testing.T) {
// Tell mysql client to connect to the server and execute the file in
// verbose mode to verify the output.
- client.CopyFiles("/sql", "test/image/mysql.sql")
- client.Link("mysql", server)
- if _, err := client.Run(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: "basic/mysql",
- }, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil {
+ Links: []dockerutil.Link{
+ {
+ Source: server,
+ Target: "mysql",
+ },
+ },
+ }
+ client.CopyFiles(&opts, "/sql", "test/image/mysql.sql")
+ if _, err := client.Run(opts, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
@@ -236,11 +244,12 @@ func TestRuby(t *testing.T) {
defer d.CleanUp()
// Execute the ruby workload.
- d.CopyFiles("/src", "test/image/ruby.rb", "test/image/ruby.sh")
- if err := d.Spawn(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: "basic/ruby",
Ports: []int{8080},
- }, "/src/ruby.sh"); err != nil {
+ }
+ d.CopyFiles(&opts, "/src", "test/image/ruby.rb", "test/image/ruby.sh")
+ if err := d.Spawn(opts, "/src/ruby.sh"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go
index 38319a3b2..340f9426e 100644
--- a/test/iptables/iptables_test.go
+++ b/test/iptables/iptables_test.go
@@ -41,11 +41,12 @@ func singleTest(t *testing.T, test TestCase) {
defer d.CleanUp()
// Create and start the container.
- d.CopyFiles("/runner", "test/iptables/runner/runner")
- if err := d.Spawn(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: "iptables",
CapAdd: []string{"NET_ADMIN"},
- }, "/runner/runner", "-name", test.Name()); err != nil {
+ }
+ d.CopyFiles(&opts, "/runner", "test/iptables/runner/runner")
+ if err := d.Spawn(opts, "/runner/runner", "-name", test.Name()); err != nil {
t.Fatalf("docker run failed: %v", err)
}
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index 75617f6de..3cac5915f 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -142,7 +142,7 @@ func TestOne(t *testing.T) {
}
const containerPosixServerBinary = "/packetimpact/posix_server"
- dut.CopyFiles("/packetimpact", "/test/packetimpact/dut/posix_server")
+ dut.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/dut/posix_server")
if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil {
t.Fatalf("unable to create container %s: %s", dut.Name, err)
@@ -193,7 +193,13 @@ func TestOne(t *testing.T) {
tbb := path.Base(*testbenchBinary)
containerTestbenchBinary := "/packetimpact/" + tbb
- testbench.CopyFiles("/packetimpact", "/test/packetimpact/tests/"+tbb)
+ runOpts = dockerutil.RunOpts{
+ Image: "packetimpact",
+ CapAdd: []string{"NET_ADMIN"},
+ Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir},
+ Foreground: true,
+ }
+ testbench.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/tests/"+tbb)
// Run tcpdump in the test bench unbuffered, without DNS resolution, just on
// the interface with the test packets.
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 5a83f8060..921e499be 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -195,7 +195,7 @@ def syscall_test(
shard_count = shard_count,
size = size,
platform = default_platform,
- use_tmpfs = False, # overlay is adding a writable tmpfs on top of root.
+ use_tmpfs = use_tmpfs,
add_uds_tree = add_uds_tree,
tags = platforms[default_platform] + tags,
overlay = True,
diff --git a/test/runner/runner.go b/test/runner/runner.go
index f8baf61b0..5456e46a6 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -391,12 +391,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
}
}
-// filterEnv returns an environment with the blacklisted variables removed.
-func filterEnv(env, blacklist []string) []string {
+// filterEnv returns an environment with the excluded variables removed.
+func filterEnv(env, exclude []string) []string {
var out []string
for _, kv := range env {
ok := true
- for _, k := range blacklist {
+ for _, k := range exclude {
if strings.HasPrefix(kv, k+"=") {
ok = false
break
diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go
index 7989dca84..54d1169ef 100644
--- a/test/runtimes/runner/main.go
+++ b/test/runtimes/runner/main.go
@@ -79,10 +79,11 @@ func runTests() int {
// getTests executes all tests as table tests.
func getTests(d *dockerutil.Docker, excludes map[string]struct{}) ([]testing.InternalTest, error) {
// Start the container.
- d.CopyFiles("/proctor", "test/runtimes/proctor/proctor")
- if err := d.Spawn(dockerutil.RunOpts{
+ opts := dockerutil.RunOpts{
Image: fmt.Sprintf("runtimes/%s", *image),
- }, "/proctor/proctor", "--pause"); err != nil {
+ }
+ d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor")
+ if err := d.Spawn(opts, "/proctor/proctor", "--pause"); err != nil {
return nil, fmt.Errorf("docker run failed: %v", err)
}
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 6d80572e7..f94c383ae 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -33,6 +33,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:aio_test",
+ vfs2 = "True",
)
syscall_test(
@@ -56,6 +57,7 @@ syscall_test(
size = "large",
add_overlay = True,
test = "//test/syscalls/linux:bind_test",
+ vfs2 = "True",
)
syscall_test(
@@ -106,6 +108,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:chroot_test",
+ vfs2 = "True",
)
syscall_test(
@@ -178,6 +181,7 @@ syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:exec_binary_test",
+ vfs2 = "True",
)
syscall_test(
@@ -188,6 +192,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:fadvise64_test",
+ vfs2 = "True",
)
syscall_test(
@@ -215,6 +220,7 @@ syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:flock_test",
+ vfs2 = "True",
)
syscall_test(
@@ -235,6 +241,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:fsync_test",
+ vfs2 = "True",
)
syscall_test(
@@ -257,6 +264,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:getdents_test",
+ vfs2 = "True",
)
syscall_test(
@@ -302,6 +310,7 @@ syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:link_test",
use_tmpfs = True, # gofer needs CAP_DAC_READ_SEARCH to use AT_EMPTY_PATH with linkat(2)
+ vfs2 = "True",
)
syscall_test(
@@ -350,6 +359,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:mount_test",
+ vfs2 = "True",
)
syscall_test(
@@ -457,6 +467,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:preadv2_test",
+ vfs2 = "True",
)
syscall_test(
@@ -566,6 +577,7 @@ syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:rename_test",
+ vfs2 = "True",
)
syscall_test(
@@ -695,6 +707,7 @@ syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:socket_filesystem_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
@@ -702,12 +715,14 @@ syscall_test(
add_overlay = True,
shard_count = 50,
test = "//test/syscalls/linux:socket_filesystem_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_inet_loopback_test",
+ vfs2 = "True",
)
syscall_test(
@@ -716,6 +731,7 @@ syscall_test(
# Takes too long for TSAN. Creates a lot of TCP sockets.
tags = ["nogotsan"],
test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test",
+ vfs2 = "True",
)
syscall_test(
@@ -791,6 +807,7 @@ syscall_test(
syscall_test(
test = "//test/syscalls/linux:socket_blocking_local_test",
+ vfs2 = "True",
)
syscall_test(
@@ -800,6 +817,7 @@ syscall_test(
syscall_test(
test = "//test/syscalls/linux:socket_non_stream_blocking_local_test",
+ vfs2 = "True",
)
syscall_test(
@@ -810,6 +828,7 @@ syscall_test(
syscall_test(
size = "large",
test = "//test/syscalls/linux:socket_stream_blocking_local_test",
+ vfs2 = "True",
)
syscall_test(
@@ -821,11 +840,13 @@ syscall_test(
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_stream_local_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_stream_nonblock_local_test",
+ vfs2 = "True",
)
syscall_test(
@@ -833,6 +854,7 @@ syscall_test(
size = "enormous",
shard_count = 5,
test = "//test/syscalls/linux:socket_unix_dgram_local_test",
+ vfs2 = "True",
)
syscall_test(
@@ -854,11 +876,13 @@ syscall_test(
size = "enormous",
shard_count = 5,
test = "//test/syscalls/linux:socket_unix_seqpacket_local_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_unix_stream_test",
+ vfs2 = "True",
)
syscall_test(
@@ -876,6 +900,7 @@ syscall_test(
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_unix_unbound_filesystem_test",
+ vfs2 = "True",
)
syscall_test(
@@ -889,6 +914,7 @@ syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_unix_unbound_stream_test",
+ vfs2 = "True",
)
syscall_test(
@@ -900,6 +926,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:stat_test",
+ vfs2 = "True",
)
syscall_test(
@@ -916,6 +943,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:symlink_test",
+ vfs2 = "True",
)
syscall_test(
@@ -1066,6 +1094,7 @@ syscall_test(
syscall_test(
test = "//test/syscalls/linux:proc_net_unix_test",
+ vfs2 = "True",
)
syscall_test(
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 96044928e..078e4a284 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -791,6 +791,7 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:cleanup",
+ "//test/util:epoll_util",
"//test/util:eventfd_util",
"//test/util:fs_util",
"@com_google_absl//absl/base:core_headers",
diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc
index 35e8a4ff3..25bef2522 100644
--- a/test/syscalls/linux/fcntl.cc
+++ b/test/syscalls/linux/fcntl.cc
@@ -191,45 +191,85 @@ TEST(FcntlTest, SetFlags) {
EXPECT_EQ(rflags, expected);
}
-TEST_F(FcntlLockTest, SetLockBadFd) {
+void TestLock(int fd, short lock_type = F_RDLCK) { // NOLINT, type in flock
struct flock fl;
- fl.l_type = F_WRLCK;
+ fl.l_type = lock_type;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
- // len 0 has a special meaning: lock all bytes despite how
- // large the file grows.
+ // len 0 locks all bytes despite how large the file grows.
fl.l_len = 0;
- EXPECT_THAT(fcntl(-1, F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallSucceeds());
}
-TEST_F(FcntlLockTest, SetLockPipe) {
- int fds[2];
- ASSERT_THAT(pipe(fds), SyscallSucceeds());
-
+void TestLockBadFD(int fd,
+ short lock_type = F_RDLCK) { // NOLINT, type in flock
struct flock fl;
- fl.l_type = F_WRLCK;
+ fl.l_type = lock_type;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
- // Same as SetLockBadFd, but doesn't matter, we expect this to fail.
+ // len 0 locks all bytes despite how large the file grows.
fl.l_len = 0;
- EXPECT_THAT(fcntl(fds[0], F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
- EXPECT_THAT(close(fds[0]), SyscallSucceeds());
- EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+ EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
}
+TEST_F(FcntlLockTest, SetLockBadFd) { TestLockBadFD(-1); }
+
TEST_F(FcntlLockTest, SetLockDir) {
auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- FileDescriptor fd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0666));
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000));
+ TestLock(fd.get());
+}
- struct flock fl;
- fl.l_type = F_RDLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- // Same as SetLockBadFd.
- fl.l_len = 0;
+TEST_F(FcntlLockTest, SetLockSymlink) {
+ // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH
+ // is supported.
+ SKIP_IF(IsRunningOnGvisor());
- EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto symlink = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path()));
+
+ auto fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000));
+ TestLockBadFD(fd.get());
+}
+
+TEST_F(FcntlLockTest, SetLockProc) {
+ auto fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000));
+ TestLock(fd.get());
+}
+
+TEST_F(FcntlLockTest, SetLockPipe) {
+ SKIP_IF(IsRunningWithVFS1());
+
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ TestLock(fds[0]);
+ TestLockBadFD(fds[0], F_WRLCK);
+
+ TestLock(fds[1], F_WRLCK);
+ TestLockBadFD(fds[1]);
+
+ EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetLockSocket) {
+ SKIP_IF(IsRunningWithVFS1());
+
+ int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_THAT(sock, SyscallSucceeds());
+
+ struct sockaddr_un addr =
+ ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX));
+ ASSERT_THAT(
+ bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallSucceeds());
+
+ TestLock(sock);
+ EXPECT_THAT(close(sock), SyscallSucceeds());
}
TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) {
@@ -241,8 +281,7 @@ TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) {
fl0.l_type = F_WRLCK;
fl0.l_whence = SEEK_SET;
fl0.l_start = 0;
- // Same as SetLockBadFd.
- fl0.l_len = 0;
+ fl0.l_len = 0; // Lock all file
// Expect that setting a write lock using a read only file descriptor
// won't work.
@@ -704,7 +743,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) {
<< "Exited with code: " << status;
}
-// This test will veirfy that blocking works as expected when another process
+// This test will verify that blocking works as expected when another process
// holds a read lock when obtaining a write lock. This test will hold the lock
// for some amount of time and then wait for the second process to send over the
// socket_fd the amount of time it was blocked for before the lock succeeded.
@@ -1109,8 +1148,7 @@ int main(int argc, char** argv) {
fl.l_start = absl::GetFlag(FLAGS_child_setlock_start);
fl.l_len = absl::GetFlag(FLAGS_child_setlock_len);
- // Test the fcntl, no need to log, the error is unambiguously
- // from fcntl at this point.
+ // Test the fcntl.
int err = 0;
int ret = 0;
@@ -1123,6 +1161,8 @@ int main(int argc, char** argv) {
if (ret == -1 && errno != 0) {
err = errno;
+ std::cerr << "CHILD lock " << setlock_on << " failed " << err
+ << std::endl;
}
// If there is a socket fd let's send back the time in microseconds it took
diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc
index e74fa2ed5..544681168 100644
--- a/test/syscalls/linux/link.cc
+++ b/test/syscalls/linux/link.cc
@@ -79,8 +79,13 @@ TEST(LinkTest, PermissionDenied) {
// Make the file "unsafe" to link by making it only readable, but not
// writable.
- const auto oldfile =
+ const auto unwriteable_file =
ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400));
+ const std::string special_path = NewTempAbsPath();
+ ASSERT_THAT(mkfifo(special_path.c_str(), 0666), SyscallSucceeds());
+ const auto setuid_file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666 | S_ISUID));
+
const std::string newname = NewTempAbsPath();
// Do setuid in a separate thread so that after finishing this test, the
@@ -97,8 +102,14 @@ TEST(LinkTest, PermissionDenied) {
EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)),
SyscallSucceeds());
- EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()),
+ EXPECT_THAT(link(unwriteable_file.path().c_str(), newname.c_str()),
+ SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(link(special_path.c_str(), newname.c_str()),
SyscallFailsWithErrno(EPERM));
+ if (!IsRunningWithVFS1()) {
+ EXPECT_THAT(link(setuid_file.path().c_str(), newname.c_str()),
+ SyscallFailsWithErrno(EPERM));
+ }
});
}
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 670c0284b..bb7d108e8 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -439,6 +439,12 @@ TEST_F(OpenTest, CanTruncateWithStrangePermissions) {
EXPECT_THAT(close(fd), SyscallSucceeds());
}
+TEST_F(OpenTest, OpenNonDirectoryWithTrailingSlash) {
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string bad_path = file.path() + "/";
+ EXPECT_THAT(open(bad_path.c_str(), O_RDONLY), SyscallFailsWithErrno(ENOTDIR));
+}
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
index 63642880a..923699ed3 100644
--- a/test/syscalls/linux/proc.cc
+++ b/test/syscalls/linux/proc.cc
@@ -1921,7 +1921,7 @@ TEST(ProcSelfMountinfo, RequiredFieldsArePresent) {
AllOf(
// Root mount.
ContainsRegex(
- R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"),
+ R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"),
// Proc mount - always rw.
ContainsRegex(
R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)")));
@@ -1967,6 +1967,17 @@ void CheckDuplicatesRecursively(std::string path) {
errno = 0;
struct dirent* dp = readdir(dir);
if (dp == nullptr) {
+ // Linux will return EINVAL when calling getdents on a /proc/tid/net
+ // file corresponding to a zombie task.
+ // See fs/proc/proc_net.c:proc_tgid_net_readdir().
+ //
+ // We just ignore the directory in this case.
+ if (errno == EINVAL && absl::StartsWith(path, "/proc/") &&
+ absl::EndsWith(path, "/net")) {
+ break;
+ }
+
+ // Otherwise, no errors are allowed.
ASSERT_EQ(errno, 0) << path;
break; // We're done.
}
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
index 02a326fc3..3377b65cf 100644
--- a/test/syscalls/linux/proc_net.cc
+++ b/test/syscalls/linux/proc_net.cc
@@ -441,6 +441,11 @@ TEST(ProcNetSnmp, CheckNetStat) {
EXPECT_EQ(value_count, 4);
}
+TEST(ProcNetSnmp, Stat) {
+ struct stat st = {};
+ ASSERT_THAT(stat("/proc/net/snmp", &st), SyscallSucceeds());
+}
+
TEST(ProcNetSnmp, CheckSnmp) {
// TODO(b/155123175): SNMP and netstat don't work on gVisor.
SKIP_IF(IsRunningOnGvisor());
diff --git a/test/syscalls/linux/raw_socket_ipv4.cc b/test/syscalls/linux/raw_socket_ipv4.cc
index cde2f07c9..0116c3e94 100644
--- a/test/syscalls/linux/raw_socket_ipv4.cc
+++ b/test/syscalls/linux/raw_socket_ipv4.cc
@@ -357,10 +357,389 @@ TEST_P(RawSocketTest, BindConnectSendAndReceive) {
EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0);
}
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover minimum receive buf size by trying to set it to zero.
+ // See:
+ // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover max buf size by trying to set the largest possible buffer size.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketRecvBuf) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover max buf size by trying to set a really large buffer size.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by trying to set a zero size receive buffer
+ // size.
+ // See:
+ // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+ ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover maximum buffer size by trying to set it to a large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketSendBuf) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maximum buffer size by trying to set it to a large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack
+ // matches linux behavior.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+
+ ASSERT_EQ(quarter_sz, val);
+}
+
void RawSocketTest::SendBuf(const char* buf, int buf_len) {
ASSERT_NO_FATAL_FAILURE(SendBufTo(s_, addr_, buf, buf_len));
}
+// Test that receive buffer limits are not enforced when the recv buffer is
+// empty.
+TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ ASSERT_THAT(
+ bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+ ASSERT_THAT(
+ connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Send data of size min and verify that it's received.
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(
+ memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()),
+ 0);
+ }
+
+ {
+ // Send data of size min + 1 and verify that its received. Both linux and
+ // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+ // is currently empty.
+ std::vector<char> buf(min + 1);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(
+ memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()),
+ 0);
+ }
+}
+
+TEST_P(RawSocketTest, RecvBufLimits) {
+ // TCP stack generates RSTs for unknown endpoints and it complicates the test
+ // as we have to deal with the RST packets as well. For testing the raw socket
+ // endpoints buffer limit enforcement we can just test for UDP.
+ //
+ // We don't use SKIP_IF here because root_test_runner explicitly fails if a
+ // test is skipped.
+ if (Protocol() == IPPROTO_TCP) {
+ return;
+ }
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ ASSERT_THAT(
+ bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+ ASSERT_THAT(
+ connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ // Now set the limit to min * 2.
+ int new_rcv_buf_sz = min * 4;
+ if (!IsRunningOnGvisor()) {
+ // Linux doubles the value specified so just set to min.
+ new_rcv_buf_sz = min * 2;
+ }
+
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+ sizeof(new_rcv_buf_sz)),
+ SyscallSucceeds());
+ int rcv_buf_sz = 0;
+ {
+ socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+ ASSERT_THAT(
+ getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
+ SyscallSucceeds());
+ }
+
+ // Set a receive timeout so that we don't block forever on reads if the test
+ // fails.
+ struct timeval tv {
+ .tv_sec = 1, .tv_usec = 0,
+ };
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ {
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ int sent = 4;
+ if (IsRunningOnGvisor()) {
+ // Linux seems to drop the 4th packet even though technically it should
+ // fit in the receive buffer.
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ sent++;
+ }
+
+ // Verify that the expected number of packets are available to be read.
+ for (int i = 0; i < sent - 1; i++) {
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(),
+ buf.size()),
+ 0);
+ }
+
+ // Assert that the last packet is dropped because the receive buffer should
+ // be full after the first four packets.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ struct iovec iov = {};
+ iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data()));
+ iov.iov_len = buf.size();
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+ ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+ }
+}
+
void RawSocketTest::SendBufTo(int sock, const struct sockaddr_in& addr,
const char* buf, int buf_len) {
// It's safe to use const_cast here because sendmsg won't modify the iovec or
diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc
index 5536ab53a..796598eee 100644
--- a/test/syscalls/linux/socket_ip_unbound.cc
+++ b/test/syscalls/linux/socket_ip_unbound.cc
@@ -40,7 +40,7 @@ TEST_P(IPUnboundSocketTest, TtlDefault) {
socklen_t get_sz = sizeof(get);
EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get, &get_sz),
SyscallSucceedsWithValue(0));
- EXPECT_EQ(get, 64);
+ EXPECT_TRUE(get == 64 || get == 127);
EXPECT_EQ(get_sz, sizeof(get));
}
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
index 4db9553e1..de0f5f01b 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
@@ -18,6 +18,7 @@
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
+#include <sys/types.h>
#include <sys/un.h>
#include <cstdio>
@@ -2236,5 +2237,220 @@ TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK));
EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK));
}
+
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufBelowMin) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &below_min,
+ sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufAboveMax) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &above_max,
+ sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= rcvBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBuf) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &quarter_sz,
+ sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+ ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufBelowMin) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &below_min,
+ sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufAboveMax) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &above_max,
+ sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &quarter_sz,
+ sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+
+ ASSERT_EQ(quarter_sz, val);
+}
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc
index 42521efef..cc1db3de8 100644
--- a/test/syscalls/linux/udp_socket_test_cases.cc
+++ b/test/syscalls/linux/udp_socket_test_cases.cc
@@ -1543,5 +1543,120 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) {
memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
EXPECT_EQ(received_tos, sent_tos);
}
+
+TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ {
+ // Send data of size min and verify that it's received.
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ }
+
+ {
+ // Send data of size min + 1 and verify that its received. Both linux and
+ // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+ // is currently empty.
+ std::vector<char> buf(min + 1);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ }
+}
+
+// Test that receive buffer limits are enforced.
+TEST_P(UdpSocketTest, RecvBufLimits) {
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ // Now set the limit to min * 4.
+ int new_rcv_buf_sz = min * 4;
+ if (!IsRunningOnGvisor() || IsRunningWithHostinet()) {
+ // Linux doubles the value specified so just set to min * 2.
+ new_rcv_buf_sz = min * 2;
+ }
+
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+ sizeof(new_rcv_buf_sz)),
+ SyscallSucceeds());
+ int rcv_buf_sz = 0;
+ {
+ socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+ ASSERT_THAT(
+ getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
+ SyscallSucceeds());
+ }
+
+ {
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ int sent = 4;
+ if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
+ // Linux seems to drop the 4th packet even though technically it should
+ // fit in the receive buffer.
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ sent++;
+ }
+
+ for (int i = 0; i < sent - 1; i++) {
+ // Receive the data.
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0);
+ }
+
+ // The last receive should fail with EAGAIN as the last packet should have
+ // been dropped due to lack of space in the receive buffer.
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+ }
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc
index 3231732ec..cbcf08451 100644
--- a/test/syscalls/linux/xattr.cc
+++ b/test/syscalls/linux/xattr.cc
@@ -73,9 +73,10 @@ TEST_F(XattrTest, XattrLargeName) {
std::string name = "user.";
name += std::string(XATTR_NAME_MAX - name.length(), 'a');
- // An xattr should be whitelisted before it can be accessed--do not allow
- // arbitrary xattrs to be read/written in gVisor.
if (!IsRunningOnGvisor()) {
+ // In gVisor, access to xattrs is controlled with an explicit list of
+ // allowed names. This name isn't going to be configured to allow access, so
+ // don't test it.
EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
SyscallSucceeds());
EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0),
diff --git a/test/util/test_util.h b/test/util/test_util.h
index 8e3245b27..e635827e6 100644
--- a/test/util/test_util.h
+++ b/test/util/test_util.h
@@ -220,6 +220,7 @@ constexpr char kKVM[] = "kvm";
bool IsRunningOnGvisor();
const std::string GvisorPlatform();
bool IsRunningWithHostinet();
+// TODO(gvisor.dev/issue/1624): Delete once VFS1 is gone.
bool IsRunningWithVFS1();
#ifdef __linux__
diff --git a/tools/nogo/matchers.go b/tools/nogo/matchers.go
index bc5772303..57a250501 100644
--- a/tools/nogo/matchers.go
+++ b/tools/nogo/matchers.go
@@ -27,10 +27,15 @@ type matcher interface {
ShouldReport(d analysis.Diagnostic, fs *token.FileSet) bool
}
-// pathRegexps excludes explicit paths.
+// pathRegexps filters explicit paths.
type pathRegexps struct {
- expr []*regexp.Regexp
- whitelist bool
+ expr []*regexp.Regexp
+
+ // include, if true, indicates that paths matching any regexp in expr
+ // match.
+ //
+ // If false, paths matching no regexps in expr match.
+ include bool
}
// buildRegexps builds a list of regular expressions.
@@ -49,33 +54,33 @@ func (p *pathRegexps) ShouldReport(d analysis.Diagnostic, fs *token.FileSet) boo
fullPos := fs.Position(d.Pos).String()
for _, path := range p.expr {
if path.MatchString(fullPos) {
- return p.whitelist
+ return p.include
}
}
- return !p.whitelist
+ return !p.include
}
// internalExcluded excludes specific internal paths.
func internalExcluded(paths ...string) *pathRegexps {
return &pathRegexps{
- expr: buildRegexps(internalPrefix, paths...),
- whitelist: false,
+ expr: buildRegexps(internalPrefix, paths...),
+ include: false,
}
}
// excludedExcluded excludes specific external paths.
func externalExcluded(paths ...string) *pathRegexps {
return &pathRegexps{
- expr: buildRegexps(externalPrefix, paths...),
- whitelist: false,
+ expr: buildRegexps(externalPrefix, paths...),
+ include: false,
}
}
// internalMatches returns a path matcher for internal packages.
func internalMatches() *pathRegexps {
return &pathRegexps{
- expr: buildRegexps(internalPrefix, ".*"),
- whitelist: true,
+ expr: buildRegexps(internalPrefix, ".*"),
+ include: true,
}
}
@@ -89,7 +94,7 @@ func (r resultExcluded) ShouldReport(d analysis.Diagnostic, _ *token.FileSet) bo
return false
}
}
- return true // Not blacklisted.
+ return true // Not excluded.
}
// andMatcher is a composite matcher.