summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/go.yml15
-rw-r--r--.travis.yml5
-rw-r--r--Makefile51
-rw-r--r--README.md1
-rwxr-xr-xbenchmarks/tcp/tcp_benchmark.sh6
-rw-r--r--benchmarks/tcp/tcp_proxy.go6
-rw-r--r--images/jekyll/Dockerfile3
-rw-r--r--pkg/abi/linux/ip.go10
-rw-r--r--pkg/cleanup/BUILD17
-rw-r--r--pkg/cleanup/cleanup.go60
-rw-r--r--pkg/cleanup/cleanup_test.go66
-rw-r--r--pkg/procid/procid_amd64.s2
-rw-r--r--pkg/procid/procid_arm64.s2
-rw-r--r--pkg/sentry/control/BUILD1
-rw-r--r--pkg/sentry/control/proc.go68
-rw-r--r--pkg/sentry/fs/file.go9
-rw-r--r--pkg/sentry/fs/fs.go3
-rw-r--r--pkg/sentry/fs/g3doc/fuse.md13
-rw-r--r--pkg/sentry/fs/mounts.go72
-rw-r--r--pkg/sentry/fs/user/BUILD7
-rw-r--r--pkg/sentry/fs/user/path.go169
-rw-r--r--pkg/sentry/fs/user/user.go2
-rw-r--r--pkg/sentry/fsimpl/ext/dentry.go12
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD3
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go65
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go12
-rw-r--r--pkg/sentry/fsimpl/gofer/host_named_pipe.go97
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go77
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go14
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go44
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go79
-rw-r--r--pkg/sentry/kernel/fd_table.go8
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go6
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go2
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_unsafe.go2
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go57
-rw-r--r--pkg/sentry/socket/netfilter/owner_matcher.go2
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go2
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go2
-rw-r--r--pkg/sentry/socket/netstack/stack.go9
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go2
-rw-r--r--pkg/sentry/socket/unix/unix.go13
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_xattr.go12
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/inotify.go134
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/read_write.go36
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/splice.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go8
-rw-r--r--pkg/sentry/vfs/BUILD15
-rw-r--r--pkg/sentry/vfs/README.md4
-rw-r--r--pkg/sentry/vfs/anonfs.go12
-rw-r--r--pkg/sentry/vfs/dentry.go27
-rw-r--r--pkg/sentry/vfs/inotify.go697
-rw-r--r--pkg/sentry/vfs/mount.go70
-rw-r--r--pkg/sentry/vfs/mount_unsafe.go2
-rw-r--r--pkg/sentry/vfs/vfs.go5
-rw-r--r--pkg/sentry/watchdog/watchdog.go12
-rw-r--r--pkg/sleep/sleep_unsafe.go2
-rw-r--r--pkg/sync/memmove_unsafe.go2
-rw-r--r--pkg/sync/mutex_unsafe.go2
-rw-r--r--pkg/sync/rwmutex_unsafe.go2
-rw-r--r--pkg/syncevent/waiter_unsafe.go2
-rw-r--r--pkg/tcpip/adapters/gonet/gonet.go5
-rw-r--r--pkg/tcpip/link/channel/channel.go10
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go6
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go10
-rw-r--r--pkg/tcpip/link/fdbased/mmap.go2
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go10
-rw-r--r--pkg/tcpip/link/loopback/loopback.go6
-rw-r--r--pkg/tcpip/link/muxed/injectable.go6
-rw-r--r--pkg/tcpip/link/muxed/injectable_test.go4
-rw-r--r--pkg/tcpip/link/qdisc/fifo/endpoint.go8
-rw-r--r--pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go2
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go4
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go26
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go10
-rw-r--r--pkg/tcpip/link/tun/device.go2
-rw-r--r--pkg/tcpip/link/waitable/waitable.go6
-rw-r--r--pkg/tcpip/link/waitable/waitable_test.go16
-rw-r--r--pkg/tcpip/network/arp/arp.go10
-rw-r--r--pkg/tcpip/network/arp/arp_test.go2
-rw-r--r--pkg/tcpip/network/ip_test.go30
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go12
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go34
-rw-r--r--pkg/tcpip/network/ipv4/ipv4_test.go28
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go10
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go14
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go8
-rw-r--r--pkg/tcpip/network/ipv6/ipv6_test.go8
-rw-r--r--pkg/tcpip/network/ipv6/ndp_test.go10
-rw-r--r--pkg/tcpip/stack/conntrack.go4
-rw-r--r--pkg/tcpip/stack/forwarder.go4
-rw-r--r--pkg/tcpip/stack/forwarder_test.go36
-rw-r--r--pkg/tcpip/stack/iptables.go91
-rw-r--r--pkg/tcpip/stack/iptables_types.go79
-rw-r--r--pkg/tcpip/stack/ndp.go4
-rw-r--r--pkg/tcpip/stack/ndp_test.go14
-rw-r--r--pkg/tcpip/stack/nic.go19
-rw-r--r--pkg/tcpip/stack/nic_test.go2
-rw-r--r--pkg/tcpip/stack/packet_buffer.go33
-rw-r--r--pkg/tcpip/stack/registration.go45
-rw-r--r--pkg/tcpip/stack/route.go19
-rw-r--r--pkg/tcpip/stack/stack.go27
-rw-r--r--pkg/tcpip/stack/stack_test.go26
-rw-r--r--pkg/tcpip/stack/transport_demuxer.go14
-rw-r--r--pkg/tcpip/stack/transport_demuxer_test.go4
-rw-r--r--pkg/tcpip/stack/transport_test.go22
-rw-r--r--pkg/tcpip/time_unsafe.go2
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go13
-rw-r--r--pkg/tcpip/transport/icmp/protocol.go2
-rw-r--r--pkg/tcpip/transport/packet/endpoint.go7
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go11
-rw-r--r--pkg/tcpip/transport/tcp/connect.go5
-rw-r--r--pkg/tcpip/transport/tcp/dispatcher.go2
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go9
-rw-r--r--pkg/tcpip/transport/tcp/forwarder.go2
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go4
-rw-r--r--pkg/tcpip/transport/tcp/segment.go2
-rw-r--r--pkg/tcpip/transport/tcp/snd.go50
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go10
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go15
-rw-r--r--pkg/tcpip/transport/udp/forwarder.go4
-rw-r--r--pkg/tcpip/transport/udp/protocol.go6
-rw-r--r--pkg/tcpip/transport/udp/udp_test.go4
-rw-r--r--pkg/tmutex/BUILD17
-rw-r--r--pkg/tmutex/tmutex.go81
-rw-r--r--pkg/tmutex/tmutex_test.go258
-rw-r--r--runsc/boot/filter/config.go8
-rw-r--r--runsc/boot/fs.go51
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/boot/vfs.go212
-rw-r--r--runsc/cgroup/BUILD2
-rw-r--r--runsc/cgroup/cgroup.go4
-rw-r--r--runsc/cmd/boot.go2
-rw-r--r--runsc/cmd/gofer.go4
-rw-r--r--runsc/cmd/spec.go18
-rw-r--r--runsc/container/BUILD4
-rw-r--r--runsc/container/console_test.go2
-rw-r--r--runsc/container/container.go7
-rw-r--r--runsc/container/container_test.go47
-rw-r--r--runsc/container/multi_container_test.go30
-rw-r--r--runsc/fsgofer/BUILD2
-rw-r--r--runsc/fsgofer/fsgofer.go8
-rw-r--r--runsc/sandbox/BUILD1
-rw-r--r--runsc/sandbox/sandbox.go3
-rw-r--r--runsc/specutils/namespace.go16
-rw-r--r--runsc/specutils/specutils.go44
-rwxr-xr-xscripts/build.sh84
-rwxr-xr-xscripts/release.sh61
-rw-r--r--test/iptables/filter_input.go60
-rw-r--r--test/iptables/iptables_test.go8
-rw-r--r--test/packetimpact/dut/posix_server.cc35
-rw-r--r--test/packetimpact/proto/posix_server.proto13
-rw-r--r--test/packetimpact/runner/packetimpact_test.go24
-rw-r--r--test/packetimpact/testbench/connections.go72
-rw-r--r--test/packetimpact/testbench/dut.go46
-rw-r--r--test/packetimpact/testbench/layers.go5
-rw-r--r--test/packetimpact/tests/BUILD71
-rw-r--r--test/packetimpact/tests/fin_wait2_timeout_test.go20
-rw-r--r--test/packetimpact/tests/icmpv6_param_problem_test.go26
-rw-r--r--test/packetimpact/tests/ipv4_id_uniqueness_test.go111
-rw-r--r--test/packetimpact/tests/tcp_close_wait_ack_test.go36
-rw-r--r--test/packetimpact/tests/tcp_cork_mss_test.go84
-rw-r--r--test/packetimpact/tests/tcp_noaccept_close_rst_test.go12
-rw-r--r--test/packetimpact/tests/tcp_outside_the_window_test.go28
-rw-r--r--test/packetimpact/tests/tcp_paws_mechanism_test.go22
-rw-r--r--test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go87
-rw-r--r--test/packetimpact/tests/tcp_retransmits_test.go22
-rw-r--r--test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go105
-rw-r--r--test/packetimpact/tests/tcp_should_piggyback_test.go64
-rw-r--r--test/packetimpact/tests/tcp_splitseg_mss_test.go71
-rw-r--r--test/packetimpact/tests/tcp_synrcvd_reset_test.go52
-rw-r--r--test/packetimpact/tests/tcp_synsent_reset_test.go88
-rw-r--r--test/packetimpact/tests/tcp_user_timeout_test.go22
-rw-r--r--test/packetimpact/tests/tcp_window_shrink_test.go24
-rw-r--r--test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go33
-rw-r--r--test/packetimpact/tests/tcp_zero_window_probe_test.go38
-rw-r--r--test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go30
-rw-r--r--test/packetimpact/tests/udp_icmp_error_propagation_test.go46
-rw-r--r--test/packetimpact/tests/udp_recv_multicast_test.go12
-rw-r--r--test/packetimpact/tests/udp_send_recv_dgram_test.go16
-rw-r--r--test/root/BUILD1
-rw-r--r--test/root/crictl_test.go27
-rw-r--r--test/root/oom_score_adj_test.go30
-rw-r--r--test/runner/defs.bzl49
-rw-r--r--test/runner/runner.go6
-rw-r--r--test/syscalls/BUILD494
-rw-r--r--test/syscalls/linux/BUILD5
-rw-r--r--test/syscalls/linux/accept_bind.cc42
-rw-r--r--test/syscalls/linux/inotify.cc430
-rw-r--r--test/syscalls/linux/open.cc23
-rw-r--r--test/syscalls/linux/pty.cc6
-rw-r--r--test/syscalls/linux/socket_unix.cc7
-rw-r--r--tools/bazel.mk20
-rwxr-xr-xtools/go_branch.sh6
-rwxr-xr-xtools/make_apt.sh (renamed from tools/make_repository.sh)80
-rwxr-xr-xtools/make_release.sh82
-rwxr-xr-xtools/tag_release.sh19
-rw-r--r--website/_config.yml1
203 files changed, 4824 insertions, 1902 deletions
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 60704f144..10c86f5cd 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -17,7 +17,14 @@ jobs:
-H "Content-Type: application/json" \
-H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"${{ github.event.pull_request.statuses_url }}"
+ if: github.event_name == 'pull_request'
- uses: actions/checkout@v2
+ if: github.event_name == 'push'
+ with:
+ fetch-depth: 0
+ token: '${{ secrets.GO_TOKEN }}'
+ - uses: actions/checkout@v2
+ if: github.event_name == 'pull_request'
with:
fetch-depth: 0
- uses: actions/setup-go@v2
@@ -41,20 +48,16 @@ jobs:
- run: go build ./...
- if: github.event_name == 'push'
run: |
- # Required dedicated credentials for the Go branch, due to the way
- # branch protection rules are configured.
- git config --global credential.helper cache
- echo -e "protocol=https\nhost=github.com\nusername=${{ secrets.GO_TOKEN }}\npassword=x-oauth-basic" | git credential approve
git remote add upstream "https://github.com/${{ github.repository }}"
git push upstream go:go
- - if: ${{ success() }}
+ - if: ${{ success() && github.event_name == 'pull_request' }}
run: |
jq -nc '{"state": "success", "context": "go tests"}' | \
curl -sL -X POST -d @- \
-H "Content-Type: application/json" \
-H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"${{ github.event.pull_request.statuses_url }}"
- - if: ${{ failure() }}
+ - if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
jq -nc '{"state": "failure", "context": "go tests"}' | \
curl -sL -X POST -d @- \
diff --git a/.travis.yml b/.travis.yml
index 40c8773fa..fbc0e46d7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,10 @@ jobs:
- os: linux
arch: arm64
script:
- - uname -a && make smoke-test
+ # On arm64, we need to create our own pipes for stderr and stdout,
+ # otherwise we will not be able to open /dev/stderr. This is probably
+ # due to AppArmor rules.
+ - uname -a && make smoke-test 2>&1 | cat
branches:
except:
# Skip copybara branches.
diff --git a/Makefile b/Makefile
index 2bcb85e9b..85818ebea 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,7 @@
# Described below.
OPTIONS :=
+STARTUP_OPTIONS :=
TARGETS := //runsc
ARGS :=
@@ -24,7 +25,7 @@ default: runsc
## usage: make <target>
## or
-## make <build|test|copy|run|sudo> OPTIONS="..." TARGETS="..." ARGS="..."
+## make <build|test|copy|run|sudo> STARTUP_OPTIONS="..." OPTIONS="..." TARGETS="..." ARGS="..."
##
## Basic targets.
##
@@ -33,6 +34,7 @@ default: runsc
## requirements.
##
## There are common arguments that may be passed to targets. These are:
+## STARTUP_OPTIONS - Bazel startup options.
## OPTIONS - Build or test options.
## TARGETS - The bazel targets.
## ARGS - Arguments for run or sudo.
@@ -152,6 +154,52 @@ website-deploy: website-push ## Deploy a new version of the website.
.PHONY: website-push
##
+## Repository builders.
+##
+## This builds a local apt repository. The following variables may be set:
+## RELEASE_ROOT - The repository root (default: "repo" directory).
+## RELEASE_KEY - The repository GPG private key file (default: dummy key is created).
+## RELEASE_NIGHTLY - Set to true if a nightly release (default: false).
+## RELEASE_COMMIT - The commit or Change-Id for the release (needed for tag).
+## RELEASE_NAME - The name of the release in the proper format (needed for tag).
+## RELEASE_NOTES - The file containing release notes (needed for tag).
+##
+RELEASE_ROOT := $(CURDIR)/repo
+RELEASE_KEY := repo.key
+RELEASE_NIGHTLY := false
+RELEASE_COMMIT :=
+RELEASE_NAME :=
+RELEASE_NOTES :=
+
+GPG_TEST_OPTIONS := $(shell if gpg --pinentry-mode loopback --version >/dev/null 2>&1; then echo --pinentry-mode loopback; fi)
+$(RELEASE_KEY):
+ @echo "WARNING: Generating a key for testing ($@); don't use this."
+ T=$$(mktemp /tmp/keyring.XXXXXX); \
+ C=$$(mktemp /tmp/config.XXXXXX); \
+ echo Key-Type: DSA >> $$C && \
+ echo Key-Length: 1024 >> $$C && \
+ echo Name-Real: Test >> $$C && \
+ echo Name-Email: test@example.com >> $$C && \
+ echo Expire-Date: 0 >> $$C && \
+ echo %commit >> $$C && \
+ gpg --batch $(GPG_TEST_OPTIONS) --passphrase '' --no-default-keyring --keyring $$T --no-tty --gen-key $$C && \
+ gpg --batch $(GPG_TEST_OPTIONS) --export-secret-keys --no-default-keyring --keyring $$T --secret-keyring $$T > $@; \
+ rc=$$?; rm -f $$T $$C; exit $$rc
+
+release: $(RELEASE_KEY) ## Builds a release.
+ @mkdir -p $(RELEASE_ROOT)
+ @T=$$(mktemp -d /tmp/release.XXXXXX); \
+ $(MAKE) copy TARGETS="runsc" DESTINATION=$$T && \
+ $(MAKE) copy TARGETS="runsc:runsc-debian" DESTINATION=$$T && \
+ NIGHTLY=$(RELEASE_NIGHTLY) tools/make_release.sh $(RELEASE_KEY) $(RELEASE_ROOT) $$T/*; \
+ rc=$$?; rm -rf $$T; exit $$rc
+.PHONY: release
+
+tag: ## Creates and pushes a release tag.
+ @tools/tag_release.sh "$(RELEASE_COMMIT)" "$(RELEASE_NAME)" "$(RELEASE_NOTES)"
+.PHONY: tag
+
+##
## Development helpers and tooling.
##
## These targets faciliate local development by automatically
@@ -179,6 +227,7 @@ dev: ## Installs a set of local runtimes. Requires sudo.
@$(MAKE) configure RUNTIME="$(RUNTIME)" ARGS="--net-raw"
@$(MAKE) configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets"
@$(MAKE) configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile"
+ @$(MAKE) configure RUNTIME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2"
@sudo systemctl restart docker
.PHONY: dev
diff --git a/README.md b/README.md
index ce3947907..d72d1dac4 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
![gVisor](g3doc/logo.png)
![](https://github.com/google/gvisor/workflows/Build/badge.svg)
+[![gVisor chat](https://badges.gitter.im/gvisor/community.png)](https://gitter.im/gvisor/community)
## What is gVisor?
diff --git a/benchmarks/tcp/tcp_benchmark.sh b/benchmarks/tcp/tcp_benchmark.sh
index e65801a7b..ef04b4ace 100755
--- a/benchmarks/tcp/tcp_benchmark.sh
+++ b/benchmarks/tcp/tcp_benchmark.sh
@@ -94,6 +94,9 @@ while [ $# -gt 0 ]; do
--cubic)
netstack_opts="${netstack_opts} -cubic"
;;
+ --moderate-recv-buf)
+ netstack_opts="${netstack_opts} -moderate_recv_buf"
+ ;;
--duration)
shift
[ "$#" -le 0 ] && echo "no duration provided" && exit 1
@@ -147,8 +150,9 @@ while [ $# -gt 0 ]; do
echo " --client use netstack as the client"
echo " --ideal reset all network emulation"
echo " --server use netstack as the server"
- echo " --mtu set the mtu (bytes)"
+ echo " --mtu set the mtu (bytes)"
echo " --sack enable SACK support"
+ echo " --moderate-recv-buf enable TCP receive buffer auto-tuning"
echo " --cubic enable CUBIC congestion control for Netstack"
echo " --duration set the test duration (s)"
echo " --latency set the latency (ms)"
diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go
index dc1593b34..f5aa0b515 100644
--- a/benchmarks/tcp/tcp_proxy.go
+++ b/benchmarks/tcp/tcp_proxy.go
@@ -56,6 +56,7 @@ var (
mask = flag.Int("mask", 8, "mask size for address")
iface = flag.String("iface", "", "network interface name to bind for netstack")
sack = flag.Bool("sack", false, "enable SACK support for netstack")
+ moderateRecvBuf = flag.Bool("moderate_recv_buf", false, "enable TCP Receive Buffer Auto-tuning")
cubic = flag.Bool("cubic", false, "enable use of CUBIC congestion control for netstack")
gso = flag.Int("gso", 0, "GSO maximum size")
swgso = flag.Bool("swgso", false, "software-level GSO")
@@ -231,6 +232,11 @@ func newNetstackImpl(mode string) (impl, error) {
return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %v", err)
}
+ // Enable Receive Buffer Auto-Tuning.
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ }
+
// Set Congestion Control to cubic if requested.
if *cubic {
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil {
diff --git a/images/jekyll/Dockerfile b/images/jekyll/Dockerfile
index cefd949a6..4860dd750 100644
--- a/images/jekyll/Dockerfile
+++ b/images/jekyll/Dockerfile
@@ -8,5 +8,6 @@ RUN gem install \
jekyll-paginate:1.1.0 \
kramdown-parser-gfm:1.1.0 \
jekyll-relative-links:0.6.1 \
- jekyll-feed:0.13.0
+ jekyll-feed:0.13.0 \
+ jekyll-sitemap:1.4.0
CMD ["/usr/gem/gems/jekyll-4.0.0/exe/jekyll", "build", "-t", "-s", "/input", "-d", "/output"]
diff --git a/pkg/abi/linux/ip.go b/pkg/abi/linux/ip.go
index 31e56ffa6..ef6d1093e 100644
--- a/pkg/abi/linux/ip.go
+++ b/pkg/abi/linux/ip.go
@@ -92,6 +92,16 @@ const (
IP_UNICAST_IF = 50
)
+// IP_MTU_DISCOVER values from uapi/linux/in.h
+const (
+ IP_PMTUDISC_DONT = 0
+ IP_PMTUDISC_WANT = 1
+ IP_PMTUDISC_DO = 2
+ IP_PMTUDISC_PROBE = 3
+ IP_PMTUDISC_INTERFACE = 4
+ IP_PMTUDISC_OMIT = 5
+)
+
// Socket options from uapi/linux/in6.h
const (
IPV6_ADDRFORM = 1
diff --git a/pkg/cleanup/BUILD b/pkg/cleanup/BUILD
new file mode 100644
index 000000000..5c34b9872
--- /dev/null
+++ b/pkg/cleanup/BUILD
@@ -0,0 +1,17 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "cleanup",
+ srcs = ["cleanup.go"],
+ visibility = ["//:sandbox"],
+ deps = [
+ ],
+)
+
+go_test(
+ name = "cleanup_test",
+ srcs = ["cleanup_test.go"],
+ library = ":cleanup",
+)
diff --git a/pkg/cleanup/cleanup.go b/pkg/cleanup/cleanup.go
new file mode 100644
index 000000000..14a05f076
--- /dev/null
+++ b/pkg/cleanup/cleanup.go
@@ -0,0 +1,60 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cleanup provides utilities to clean "stuff" on defers.
+package cleanup
+
+// Cleanup allows defers to be aborted when cleanup needs to happen
+// conditionally. Usage:
+// cu := cleanup.Make(func() { f.Close() })
+// defer cu.Clean() // failure before release is called will close the file.
+// ...
+// cu.Add(func() { f2.Close() }) // Adds another cleanup function
+// ...
+// cu.Release() // on success, aborts closing the file.
+// return f
+type Cleanup struct {
+ cleaners []func()
+}
+
+// Make creates a new Cleanup object.
+func Make(f func()) Cleanup {
+ return Cleanup{cleaners: []func(){f}}
+}
+
+// Add adds a new function to be called on Clean().
+func (c *Cleanup) Add(f func()) {
+ c.cleaners = append(c.cleaners, f)
+}
+
+// Clean calls all cleanup functions in reverse order.
+func (c *Cleanup) Clean() {
+ clean(c.cleaners)
+ c.cleaners = nil
+}
+
+// Release releases the cleanup from its duties, i.e. cleanup functions are not
+// called after this point. Returns a function that calls all registered
+// functions in case the caller has use for them.
+func (c *Cleanup) Release() func() {
+ old := c.cleaners
+ c.cleaners = nil
+ return func() { clean(old) }
+}
+
+func clean(cleaners []func()) {
+ for i := len(cleaners) - 1; i >= 0; i-- {
+ cleaners[i]()
+ }
+}
diff --git a/pkg/cleanup/cleanup_test.go b/pkg/cleanup/cleanup_test.go
new file mode 100644
index 000000000..ab3d9ed95
--- /dev/null
+++ b/pkg/cleanup/cleanup_test.go
@@ -0,0 +1,66 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cleanup
+
+import "testing"
+
+func testCleanupHelper(clean, cleanAdd *bool, release bool) func() {
+ cu := Make(func() {
+ *clean = true
+ })
+ cu.Add(func() {
+ *cleanAdd = true
+ })
+ defer cu.Clean()
+ if release {
+ return cu.Release()
+ }
+ return nil
+}
+
+func TestCleanup(t *testing.T) {
+ clean := false
+ cleanAdd := false
+ testCleanupHelper(&clean, &cleanAdd, false)
+ if !clean {
+ t.Fatalf("cleanup function was not called.")
+ }
+ if !cleanAdd {
+ t.Fatalf("added cleanup function was not called.")
+ }
+}
+
+func TestRelease(t *testing.T) {
+ clean := false
+ cleanAdd := false
+ cleaner := testCleanupHelper(&clean, &cleanAdd, true)
+
+ // Check that clean was not called after release.
+ if clean {
+ t.Fatalf("cleanup function was called.")
+ }
+ if cleanAdd {
+ t.Fatalf("added cleanup function was called.")
+ }
+
+ // Call the cleaner function and check that both cleanup functions are called.
+ cleaner()
+ if !clean {
+ t.Fatalf("cleanup function was not called.")
+ }
+ if !cleanAdd {
+ t.Fatalf("added cleanup function was not called.")
+ }
+}
diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s
index 38cea9be3..7c622e5d7 100644
--- a/pkg/procid/procid_amd64.s
+++ b/pkg/procid/procid_amd64.s
@@ -14,7 +14,7 @@
// +build amd64
// +build go1.8
-// +build !go1.15
+// +build !go1.16
#include "textflag.h"
diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s
index 4f4b70fef..48ebb5fd1 100644
--- a/pkg/procid/procid_arm64.s
+++ b/pkg/procid/procid_arm64.s
@@ -14,7 +14,7 @@
// +build arm64
// +build go1.8
-// +build !go1.15
+// +build !go1.16
#include "textflag.h"
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index e74275d2d..0c9a62f0d 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -23,6 +23,7 @@ go_library(
"//pkg/sentry/fdimport",
"//pkg/sentry/fs",
"//pkg/sentry/fs/host",
+ "//pkg/sentry/fs/user",
"//pkg/sentry/fsbridge",
"//pkg/sentry/fsimpl/host",
"//pkg/sentry/kernel",
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 2ed17ee09..8767430b7 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -18,7 +18,6 @@ import (
"bytes"
"encoding/json"
"fmt"
- "path"
"sort"
"strings"
"text/tabwriter"
@@ -28,10 +27,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fdimport"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -190,17 +189,12 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
// transferred to the new process.
initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2()
}
-
- paths := fs.GetPath(initArgs.Envv)
- vfsObj := proc.Kernel.VFS()
- file, err := ResolveExecutablePath(ctx, vfsObj, initArgs.WorkingDirectory, initArgs.Argv[0], paths)
+ file, err := getExecutableFD(ctx, creds, proc.Kernel.VFS(), initArgs.MountNamespaceVFS2, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0])
if err != nil {
- return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
+ return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in environment %v: %v", initArgs.Argv[0], initArgs.Envv, err)
}
initArgs.File = fsbridge.NewVFSFile(file)
} else {
- // Get the full path to the filename from the PATH env variable.
- paths := fs.GetPath(initArgs.Envv)
if initArgs.MountNamespace == nil {
// Set initArgs so that 'ctx' returns the namespace.
initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace()
@@ -209,9 +203,9 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
// be donated to the new process in CreateProcess.
initArgs.MountNamespace.IncRef()
}
- f, err := initArgs.MountNamespace.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths)
+ f, err := user.ResolveExecutablePath(ctx, creds, initArgs.MountNamespace, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0])
if err != nil {
- return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
+ return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], initArgs.Envv, err)
}
initArgs.Filename = f
}
@@ -429,53 +423,17 @@ func ttyName(tty *kernel.TTY) string {
return fmt.Sprintf("pts/%d", tty.Index)
}
-// ResolveExecutablePath resolves the given executable name given a set of
-// paths that might contain it.
-func ResolveExecutablePath(ctx context.Context, vfsObj *vfs.VirtualFilesystem, wd, name string, paths []string) (*vfs.FileDescription, error) {
- root := vfs.RootFromContext(ctx)
- defer root.DecRef()
- creds := auth.CredentialsFromContext(ctx)
-
- // Absolute paths can be used directly.
- if path.IsAbs(name) {
- return openExecutable(ctx, vfsObj, creds, root, name)
- }
-
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(name, '/') > 0 {
- if len(wd) == 0 {
- wd = "/"
- }
- if !path.IsAbs(wd) {
- return nil, fmt.Errorf("working directory %q must be absolute", wd)
- }
- return openExecutable(ctx, vfsObj, creds, root, path.Join(wd, name))
+// getExecutableFD resolves the given executable name and returns a
+// vfs.FileDescription for the executable file.
+func getExecutableFD(ctx context.Context, creds *auth.Credentials, vfsObj *vfs.VirtualFilesystem, mns *vfs.MountNamespace, envv []string, wd, name string) (*vfs.FileDescription, error) {
+ path, err := user.ResolveExecutablePathVFS2(ctx, creds, mns, envv, wd, name)
+ if err != nil {
+ return nil, err
}
- // Otherwise, we must lookup the name in the paths, starting from the
- // calling context's root directory.
- for _, p := range paths {
- if !path.IsAbs(p) {
- // Relative paths aren't safe, no one should be using them.
- log.Warningf("Skipping relative path %q in $PATH", p)
- continue
- }
-
- binPath := path.Join(p, name)
- f, err := openExecutable(ctx, vfsObj, creds, root, binPath)
- if err != nil {
- return nil, err
- }
- if f == nil {
- continue // Not found/no access.
- }
- return f, nil
- }
- return nil, syserror.ENOENT
-}
+ root := vfs.RootFromContext(ctx)
+ defer root.DecRef()
-func openExecutable(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, path string) (*vfs.FileDescription, error) {
pop := vfs.PathOperation{
Root: root,
Start: root, // binPath is absolute, Start can be anything.
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 846252c89..2a278fbe3 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -310,7 +310,6 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
if !f.mu.Lock(ctx) {
return 0, syserror.ErrInterrupted
}
-
unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
if f.Flags().Append {
@@ -355,7 +354,6 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// offset."
unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
defer unlockAppendMu()
-
if f.Flags().Append {
if err := f.offsetForAppend(ctx, &offset); err != nil {
return 0, err
@@ -374,9 +372,10 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
return f.FileOperations.Write(ctx, f, src, offset)
}
-// offsetForAppend sets the given offset to the end of the file.
+// offsetForAppend atomically sets the given offset to the end of the file.
//
-// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for
+// writing.
func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
if err != nil {
@@ -386,7 +385,7 @@ func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
}
// Update the offset.
- *offset = uattr.Size
+ atomic.StoreInt64(offset, uattr.Size)
return nil
}
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index bdba6efe5..d2dbff268 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -42,9 +42,10 @@
// Dirent.dirMu
// Dirent.mu
// DirentCache.mu
-// Locks in InodeOperations implementations or overlayEntry
// Inode.Watches.mu (see `Inotify` for other lock ordering)
// MountSource.mu
+// Inode.appendMu
+// Locks in InodeOperations implementations or overlayEntry
//
// If multiple Dirent or MountSource locks must be taken, locks in the parent must be
// taken before locks in their children.
diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md
index 635cc009b..2ca84dd74 100644
--- a/pkg/sentry/fs/g3doc/fuse.md
+++ b/pkg/sentry/fs/g3doc/fuse.md
@@ -76,7 +76,8 @@ ops can be implemented in parallel.
#### Minimal client that can mount a trivial FUSE filesystem.
-- Implement `/dev/fuse`.
+- Implement `/dev/fuse` - a character device used to establish an FD for
+ communication between the sentry and the server daemon.
- Implement basic FUSE ops like `FUSE_INIT`, `FUSE_DESTROY`.
@@ -99,7 +100,7 @@ ops can be implemented in parallel.
## FUSE Protocol
The FUSE protocol is a request-response protocol. All requests are initiated by
-the client. The wire-format for the protocol is raw c structs serialized to
+the client. The wire-format for the protocol is raw C structs serialized to
memory.
All FUSE requests begin with the following request header:
@@ -255,6 +256,8 @@ I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`.
# References
-- `fuse(4)` manpage.
-- Linux kernel FUSE documentation:
- https://www.kernel.org/doc/html/latest/filesystems/fuse.html
+- [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html)
+- [Linux kernel FUSE documentation](https://www.kernel.org/doc/html/latest/filesystems/fuse.html)
+- [The reference implementation of the Linux FUSE (Filesystem in Userspace)
+ interface](https://github.com/libfuse/libfuse)
+- [The kernel interface of FUSE](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fuse.h)
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index b414ddaee..3f2bd0e87 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -17,13 +17,9 @@ package fs
import (
"fmt"
"math"
- "path"
- "strings"
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sync"
@@ -625,71 +621,3 @@ func (mns *MountNamespace) SyncAll(ctx context.Context) {
defer mns.mu.Unlock()
mns.root.SyncAll(ctx)
}
-
-// ResolveExecutablePath resolves the given executable name given a set of
-// paths that might contain it.
-func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name string, paths []string) (string, error) {
- // Absolute paths can be used directly.
- if path.IsAbs(name) {
- return name, nil
- }
-
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(name, '/') > 0 {
- if wd == "" {
- wd = "/"
- }
- if !path.IsAbs(wd) {
- return "", fmt.Errorf("working directory %q must be absolute", wd)
- }
- return path.Join(wd, name), nil
- }
-
- // Otherwise, We must lookup the name in the paths, starting from the
- // calling context's root directory.
- root := RootFromContext(ctx)
- if root == nil {
- // Caller has no root. Don't bother traversing anything.
- return "", syserror.ENOENT
- }
- defer root.DecRef()
- for _, p := range paths {
- binPath := path.Join(p, name)
- traversals := uint(linux.MaxSymlinkTraversals)
- d, err := mns.FindInode(ctx, root, nil, binPath, &traversals)
- if err == syserror.ENOENT || err == syserror.EACCES {
- // Didn't find it here.
- continue
- }
- if err != nil {
- return "", err
- }
- defer d.DecRef()
-
- // Check that it is a regular file.
- if !IsRegular(d.Inode.StableAttr) {
- continue
- }
-
- // Check whether we can read and execute the found file.
- if err := d.Inode.CheckPermission(ctx, PermMask{Read: true, Execute: true}); err != nil {
- log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err)
- continue
- }
- return path.Join("/", p, name), nil
- }
- return "", syserror.ENOENT
-}
-
-// GetPath returns the PATH as a slice of strings given the environment
-// variables.
-func GetPath(env []string) []string {
- const prefix = "PATH="
- for _, e := range env {
- if strings.HasPrefix(e, prefix) {
- return strings.Split(strings.TrimPrefix(e, prefix), ":")
- }
- }
- return nil
-}
diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD
index f37f979f1..bd5dac373 100644
--- a/pkg/sentry/fs/user/BUILD
+++ b/pkg/sentry/fs/user/BUILD
@@ -4,15 +4,20 @@ package(licenses = ["notice"])
go_library(
name = "user",
- srcs = ["user.go"],
+ srcs = [
+ "path.go",
+ "user.go",
+ ],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/log",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
+ "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go
new file mode 100644
index 000000000..fbd4547a7
--- /dev/null
+++ b/pkg/sentry/fs/user/path.go
@@ -0,0 +1,169 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package user
+
+import (
+ "fmt"
+ "path"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// ResolveExecutablePath resolves the given executable name given the working
+// dir and environment.
+func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs.MountNamespace, envv []string, wd, name string) (string, error) {
+ // Absolute paths can be used directly.
+ if path.IsAbs(name) {
+ return name, nil
+ }
+
+ // Paths with '/' in them should be joined to the working directory, or
+ // to the root if working directory is not set.
+ if strings.IndexByte(name, '/') > 0 {
+ if wd == "" {
+ wd = "/"
+ }
+ if !path.IsAbs(wd) {
+ return "", fmt.Errorf("working directory %q must be absolute", wd)
+ }
+ return path.Join(wd, name), nil
+ }
+
+ // Otherwise, We must lookup the name in the paths, starting from the
+ // calling context's root directory.
+ paths := getPath(envv)
+
+ root := fs.RootFromContext(ctx)
+ if root == nil {
+ // Caller has no root. Don't bother traversing anything.
+ return "", syserror.ENOENT
+ }
+ defer root.DecRef()
+ for _, p := range paths {
+ if !path.IsAbs(p) {
+ // Relative paths aren't safe, no one should be using them.
+ log.Warningf("Skipping relative path %q in $PATH", p)
+ continue
+ }
+
+ binPath := path.Join(p, name)
+ traversals := uint(linux.MaxSymlinkTraversals)
+ d, err := mns.FindInode(ctx, root, nil, binPath, &traversals)
+ if err == syserror.ENOENT || err == syserror.EACCES {
+ // Didn't find it here.
+ continue
+ }
+ if err != nil {
+ return "", err
+ }
+ defer d.DecRef()
+
+ // Check that it is a regular file.
+ if !fs.IsRegular(d.Inode.StableAttr) {
+ continue
+ }
+
+ // Check whether we can read and execute the found file.
+ if err := d.Inode.CheckPermission(ctx, fs.PermMask{Read: true, Execute: true}); err != nil {
+ log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err)
+ continue
+ }
+ return path.Join("/", p, name), nil
+ }
+
+ // Couldn't find it.
+ return "", syserror.ENOENT
+}
+
+// ResolveExecutablePathVFS2 resolves the given executable name given the
+// working dir and environment.
+func ResolveExecutablePathVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, envv []string, wd, name string) (string, error) {
+ // Absolute paths can be used directly.
+ if path.IsAbs(name) {
+ return name, nil
+ }
+
+ // Paths with '/' in them should be joined to the working directory, or
+ // to the root if working directory is not set.
+ if strings.IndexByte(name, '/') > 0 {
+ if wd == "" {
+ wd = "/"
+ }
+ if !path.IsAbs(wd) {
+ return "", fmt.Errorf("working directory %q must be absolute", wd)
+ }
+ return path.Join(wd, name), nil
+ }
+
+ // Otherwise, We must lookup the name in the paths, starting from the
+ // calling context's root directory.
+ paths := getPath(envv)
+
+ root := mns.Root()
+ defer root.DecRef()
+ for _, p := range paths {
+ if !path.IsAbs(p) {
+ // Relative paths aren't safe, no one should be using them.
+ log.Warningf("Skipping relative path %q in $PATH", p)
+ continue
+ }
+
+ binPath := path.Join(p, name)
+ pop := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(binPath),
+ FollowFinalSymlink: true,
+ }
+ opts := &vfs.OpenOptions{
+ FileExec: true,
+ Flags: linux.O_RDONLY,
+ }
+ dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
+ if err == syserror.ENOENT || err == syserror.EACCES {
+ // Didn't find it here.
+ continue
+ }
+ if err != nil {
+ return "", err
+ }
+ dentry.DecRef()
+
+ return binPath, nil
+ }
+
+ // Couldn't find it.
+ return "", syserror.ENOENT
+}
+
+// getPath returns the PATH as a slice of strings given the environment
+// variables.
+func getPath(env []string) []string {
+ const prefix = "PATH="
+ for _, e := range env {
+ if strings.HasPrefix(e, prefix) {
+ return strings.Split(strings.TrimPrefix(e, prefix), ":")
+ }
+ }
+ return nil
+}
diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go
index fe7f67c00..f4d525523 100644
--- a/pkg/sentry/fs/user/user.go
+++ b/pkg/sentry/fs/user/user.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// Package user contains methods for resolving filesystem paths based on the
+// user and their environment.
package user
import (
diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go
index bfbd7c3d4..6bd1a9fc6 100644
--- a/pkg/sentry/fsimpl/ext/dentry.go
+++ b/pkg/sentry/fsimpl/ext/dentry.go
@@ -60,3 +60,15 @@ func (d *dentry) DecRef() {
// inode.decRef().
d.inode.decRef()
}
+
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {}
+
+// Watches implements vfs.DentryImpl.Watches.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *dentry) Watches() *vfs.Watches {
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 67e916525..f5f35a3bc 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -35,6 +35,7 @@ go_library(
"fstree.go",
"gofer.go",
"handle.go",
+ "host_named_pipe.go",
"p9file.go",
"regular_file.go",
"socket.go",
@@ -47,6 +48,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fd",
+ "//pkg/fdnotifier",
"//pkg/fspath",
"//pkg/log",
"//pkg/p9",
@@ -71,6 +73,7 @@ go_library(
"//pkg/unet",
"//pkg/usermem",
"//pkg/waiter",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 7f2181216..36e0e1856 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -760,7 +760,7 @@ afterTrailingSymlink:
parent.dirMu.Unlock()
return nil, syserror.EPERM
}
- fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts)
+ fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds)
parent.dirMu.Unlock()
return fd, err
}
@@ -873,19 +873,37 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts
if opts.Flags&linux.O_DIRECT != 0 {
return nil, syserror.EINVAL
}
- h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0)
+ // We assume that the server silently inserts O_NONBLOCK in the open flags
+ // for all named pipes (because all existing gofers do this).
+ //
+ // NOTE(b/133875563): This makes named pipe opens racy, because the
+ // mechanisms for translating nonblocking to blocking opens can only detect
+ // the instantaneous presence of a peer holding the other end of the pipe
+ // open, not whether the pipe was *previously* opened by a peer that has
+ // since closed its end.
+ isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0
+retry:
+ h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0)
if err != nil {
+ if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && err == syserror.ENXIO {
+ // An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails
+ // with ENXIO if opening the same named pipe with O_WRONLY would
+ // block because there are no readers of the pipe.
+ if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil {
+ return nil, err
+ }
+ goto retry
+ }
return nil, err
}
- seekable := d.fileType() == linux.S_IFREG
- fd := &specialFileFD{
- handle: h,
- seekable: seekable,
+ if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 {
+ if err := blockUntilNonblockingPipeHasWriter(ctx, h.fd); err != nil {
+ h.close(ctx)
+ return nil, err
+ }
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
- DenyPRead: !seekable,
- DenyPWrite: !seekable,
- }); err != nil {
+ fd, err := newSpecialFileFD(h, mnt, d, opts.Flags)
+ if err != nil {
h.close(ctx)
return nil, err
}
@@ -894,7 +912,7 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts
// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked.
// !d.isSynthetic().
-func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
return nil, err
}
@@ -947,6 +965,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
return nil, err
}
+ *ds = appendDentry(*ds, child)
// Incorporate the fid that was opened by lcreate.
useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
if useRegularFileFD {
@@ -959,10 +978,6 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags)
child.handleMu.Unlock()
}
- // Take a reference on the new dentry to be held by the new file
- // description. (This reference also means that the new dentry is not
- // eligible for caching yet, so we don't need to append to a dentry slice.)
- child.refs = 1
// Insert the dentry into the tree.
d.cacheNewChildLocked(child, name)
if d.cachedMetadataAuthoritative() {
@@ -981,22 +996,16 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
childVFSFD = &fd.vfsfd
} else {
- seekable := child.fileType() == linux.S_IFREG
- fd := &specialFileFD{
- handle: handle{
- file: openFile,
- fd: -1,
- },
- seekable: seekable,
+ h := handle{
+ file: openFile,
+ fd: -1,
}
if fdobj != nil {
- fd.handle.fd = int32(fdobj.Release())
+ h.fd = int32(fdobj.Release())
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{
- DenyPRead: !seekable,
- DenyPWrite: !seekable,
- }); err != nil {
- fd.handle.close(ctx)
+ fd, err := newSpecialFileFD(h, mnt, child, opts.Flags)
+ if err != nil {
+ h.close(ctx)
return nil, err
}
childVFSFD = &fd.vfsfd
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 131da332f..3f3bd56f0 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1039,6 +1039,18 @@ func (d *dentry) decRefLocked() {
}
}
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {}
+
+// Watches implements vfs.DentryImpl.Watches.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *dentry) Watches() *vfs.Watches {
+ return nil
+}
+
// checkCachingLocked should be called after d's reference count becomes 0 or it
// becomes disowned.
//
diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
new file mode 100644
index 000000000..7294de7d6
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
@@ -0,0 +1,97 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "fmt"
+ "sync"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Global pipe used by blockUntilNonblockingPipeHasWriter since we can't create
+// pipes after sentry initialization due to syscall filters.
+var (
+ tempPipeMu sync.Mutex
+ tempPipeReadFD int
+ tempPipeWriteFD int
+ tempPipeBuf [1]byte
+)
+
+func init() {
+ var pipeFDs [2]int
+ if err := unix.Pipe(pipeFDs[:]); err != nil {
+ panic(fmt.Sprintf("failed to create pipe for gofer.blockUntilNonblockingPipeHasWriter: %v", err))
+ }
+ tempPipeReadFD = pipeFDs[0]
+ tempPipeWriteFD = pipeFDs[1]
+}
+
+func blockUntilNonblockingPipeHasWriter(ctx context.Context, fd int32) error {
+ for {
+ ok, err := nonblockingPipeHasWriter(fd)
+ if err != nil {
+ return err
+ }
+ if ok {
+ return nil
+ }
+ if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil {
+ return err
+ }
+ }
+}
+
+func nonblockingPipeHasWriter(fd int32) (bool, error) {
+ tempPipeMu.Lock()
+ defer tempPipeMu.Unlock()
+ // Copy 1 byte from fd into the temporary pipe.
+ n, err := unix.Tee(int(fd), tempPipeWriteFD, 1, unix.SPLICE_F_NONBLOCK)
+ if err == syserror.EAGAIN {
+ // The pipe represented by fd is empty, but has a writer.
+ return true, nil
+ }
+ if err != nil {
+ return false, err
+ }
+ if n == 0 {
+ // The pipe represented by fd is empty and has no writer.
+ return false, nil
+ }
+ // The pipe represented by fd is non-empty, so it either has, or has
+ // previously had, a writer. Remove the byte copied to the temporary pipe
+ // before returning.
+ if n, err := unix.Read(tempPipeReadFD, tempPipeBuf[:]); err != nil || n != 1 {
+ panic(fmt.Sprintf("failed to drain pipe for gofer.blockUntilNonblockingPipeHasWriter: got (%d, %v), wanted (1, nil)", n, err))
+ }
+ return true, nil
+}
+
+func sleepBetweenNamedPipeOpenChecks(ctx context.Context) error {
+ t := time.NewTimer(100 * time.Millisecond)
+ defer t.Stop()
+ cancel := ctx.SleepStart()
+ select {
+ case <-t.C:
+ ctx.SleepFinish(true)
+ return nil
+ case <-cancel:
+ ctx.SleepFinish(false)
+ return syserror.ErrInterrupted
+ }
+}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index a464e6a94..ff6126b87 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -19,17 +19,18 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
-// specialFileFD implements vfs.FileDescriptionImpl for files other than
-// regular files, directories, and symlinks: pipes, sockets, etc. It is also
-// used for regular files when filesystemOptions.specialRegularFiles is in
-// effect. specialFileFD differs from regularFileFD by using per-FD handles
-// instead of shared per-dentry handles, and never buffering I/O.
+// specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
+// special files, and (when filesystemOptions.specialRegularFiles is in effect)
+// regular files. specialFileFD differs from regularFileFD by using per-FD
+// handles instead of shared per-dentry handles, and never buffering I/O.
type specialFileFD struct {
fileDescription
@@ -40,13 +41,47 @@ type specialFileFD struct {
// file offset is significant, i.e. a regular file. seekable is immutable.
seekable bool
+ // mayBlock is true if this file description represents a file for which
+ // queue may send I/O readiness events. mayBlock is immutable.
+ mayBlock bool
+ queue waiter.Queue
+
// If seekable is true, off is the file offset. off is protected by mu.
mu sync.Mutex
off int64
}
+func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
+ ftype := d.fileType()
+ seekable := ftype == linux.S_IFREG
+ mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK
+ fd := &specialFileFD{
+ handle: h,
+ seekable: seekable,
+ mayBlock: mayBlock,
+ }
+ if mayBlock && h.fd >= 0 {
+ if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil {
+ return nil, err
+ }
+ }
+ if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
+ DenyPRead: !seekable,
+ DenyPWrite: !seekable,
+ }); err != nil {
+ if mayBlock && h.fd >= 0 {
+ fdnotifier.RemoveFD(h.fd)
+ }
+ return nil, err
+ }
+ return fd, nil
+}
+
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *specialFileFD) Release() {
+ if fd.mayBlock && fd.handle.fd >= 0 {
+ fdnotifier.RemoveFD(fd.handle.fd)
+ }
fd.handle.close(context.Background())
fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
fs.syncMu.Lock()
@@ -62,6 +97,32 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error {
return fd.handle.file.flush(ctx)
}
+// Readiness implements waiter.Waitable.Readiness.
+func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+ if fd.mayBlock {
+ return fdnotifier.NonBlockingPoll(fd.handle.fd, mask)
+ }
+ return fd.fileDescription.Readiness(mask)
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ if fd.mayBlock {
+ fd.queue.EventRegister(e, mask)
+ return
+ }
+ fd.fileDescription.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *specialFileFD) EventUnregister(e *waiter.Entry) {
+ if fd.mayBlock {
+ fd.queue.EventUnregister(e)
+ return
+ }
+ fd.fileDescription.EventUnregister(e)
+}
+
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if fd.seekable && offset < 0 {
@@ -81,6 +142,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
}
buf := make([]byte, dst.NumBytes())
n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+ if err == syserror.EAGAIN {
+ err = syserror.ErrWouldBlock
+ }
if n == 0 {
return 0, err
}
@@ -130,6 +194,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, err
}
n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+ if err == syserror.EAGAIN {
+ err = syserror.ErrWouldBlock
+ }
return int64(n), err
}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index a83151ad3..bbee8ccda 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -225,9 +225,21 @@ func (d *Dentry) destroy() {
}
}
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {}
+
+// Watches implements vfs.DentryImpl.Watches.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *Dentry) Watches() *vfs.Watches {
+ return nil
+}
+
// InsertChild inserts child into the vfs dentry cache with the given name under
// this dentry. This does not update the directory inode, so calling this on
-// it's own isn't sufficient to insert a child into a directory. InsertChild
+// its own isn't sufficient to insert a child into a directory. InsertChild
// updates the link count on d if required.
//
// Precondition: d must represent a directory inode.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index 007be1572..062321cbc 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -59,6 +59,7 @@ go_library(
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
"//pkg/sentry/socket/unix/transport",
+ "//pkg/sentry/uniqueid",
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/sentry/vfs/lock",
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index f2399981b..70387cb9c 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -79,6 +79,7 @@ func (dir *directory) removeChildLocked(child *dentry) {
dir.iterMu.Lock()
dir.childList.Remove(child)
dir.iterMu.Unlock()
+ child.unlinked = true
}
type directoryFD struct {
@@ -112,6 +113,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
dir.iterMu.Lock()
defer dir.iterMu.Unlock()
+ fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
fd.inode().touchAtime(fd.vfsfd.Mount())
if fd.off == 0 {
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 80fa7b29d..183eb975c 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -177,6 +177,12 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa
if err := create(parentDir, name); err != nil {
return err
}
+
+ ev := linux.IN_CREATE
+ if dir {
+ ev |= linux.IN_ISDIR
+ }
+ parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent)
parentDir.inode.touchCMtime()
return nil
}
@@ -241,6 +247,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EMLINK
}
d.inode.incLinksLocked()
+ d.inode.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent)
parentDir.insertChildLocked(fs.newDentry(d.inode), name)
return nil
})
@@ -354,6 +361,7 @@ afterTrailingSymlink:
if err != nil {
return nil, err
}
+ parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent)
parentDir.inode.touchCMtime()
return fd, nil
}
@@ -559,6 +567,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
newParentDir.inode.touchCMtime()
}
renamed.inode.touchCtime()
+
+ vfs.InotifyRename(ctx, &renamed.inode.watches, &oldParentDir.inode.watches, &newParentDir.inode.watches, oldName, newName, renamed.inode.isDir())
return nil
}
@@ -603,8 +613,11 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
parentDir.removeChildLocked(child)
- parentDir.inode.decLinksLocked() // from child's ".."
+ parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent)
+ // Remove links for child, child/., and child/..
child.inode.decLinksLocked()
+ child.inode.decLinksLocked()
+ parentDir.inode.decLinksLocked()
vfsObj.CommitDeleteDentry(&child.vfsd)
parentDir.inode.touchCMtime()
return nil
@@ -618,7 +631,14 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
if err != nil {
return err
}
- return d.inode.setStat(ctx, rp.Credentials(), &opts.Stat)
+ if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil {
+ return err
+ }
+
+ if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ d.InotifyWithParent(ev, 0, vfs.InodeEvent)
+ }
+ return nil
}
// StatAt implements vfs.FilesystemImpl.StatAt.
@@ -698,6 +718,12 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
return err
}
+
+ // Generate inotify events. Note that this must take place before the link
+ // count of the child is decremented, or else the watches may be dropped
+ // before these events are added.
+ vfs.InotifyRemoveChild(&child.inode.watches, &parentDir.inode.watches, name)
+
parentDir.removeChildLocked(child)
child.inode.decLinksLocked()
vfsObj.CommitDeleteDentry(&child.vfsd)
@@ -754,7 +780,12 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
if err != nil {
return err
}
- return d.inode.setxattr(rp.Credentials(), &opts)
+ if err := d.inode.setxattr(rp.Credentials(), &opts); err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
@@ -765,7 +796,12 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath,
if err != nil {
return err
}
- return d.inode.removexattr(rp.Credentials(), name)
+ if err := d.inode.removexattr(rp.Credentials(), name); err != nil {
+ return err
+ }
+
+ d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// PrependPath implements vfs.FilesystemImpl.PrependPath.
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 1e781aecd..f0e098702 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -163,6 +163,11 @@ type dentry struct {
// filesystem.mu.
name string
+ // unlinked indicates whether this dentry has been unlinked from its parent.
+ // It is only set to true on an unlink operation, and never set from true to
+ // false. unlinked is protected by filesystem.mu.
+ unlinked bool
+
// dentryEntry (ugh) links dentries into their parent directory.childList.
dentryEntry
@@ -201,6 +206,26 @@ func (d *dentry) DecRef() {
d.inode.decRef()
}
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {
+ if d.inode.isDir() {
+ events |= linux.IN_ISDIR
+ }
+
+ // The ordering below is important, Linux always notifies the parent first.
+ if d.parent != nil {
+ // Note that d.parent or d.name may be stale if there is a concurrent
+ // rename operation. Inotify does not provide consistency guarantees.
+ d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.unlinked)
+ }
+ d.inode.watches.Notify("", events, cookie, et)
+}
+
+// Watches implements vfs.DentryImpl.Watches.
+func (d *dentry) Watches() *vfs.Watches {
+ return &d.inode.watches
+}
+
// inode represents a filesystem object.
type inode struct {
// fs is the owning filesystem. fs is immutable.
@@ -209,11 +234,9 @@ type inode struct {
// refs is a reference count. refs is accessed using atomic memory
// operations.
//
- // A reference is held on all inodes that are reachable in the filesystem
- // tree. For non-directories (which may have multiple hard links), this
- // means that a reference is dropped when nlink reaches 0. For directories,
- // nlink never reaches 0 due to the "." entry; instead,
- // filesystem.RmdirAt() drops the reference.
+ // A reference is held on all inodes as long as they are reachable in the
+ // filesystem tree, i.e. nlink is nonzero. This reference is dropped when
+ // nlink reaches 0.
refs int64
// xattrs implements extended attributes.
@@ -238,6 +261,9 @@ type inode struct {
// Advisory file locks, which lock at the inode level.
locks lock.FileLocks
+ // Inotify watches for this inode.
+ watches vfs.Watches
+
impl interface{} // immutable
}
@@ -259,6 +285,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials,
i.ctime = now
i.mtime = now
// i.nlink initialized by caller
+ i.watches = vfs.Watches{}
i.impl = impl
}
@@ -276,14 +303,17 @@ func (i *inode) incLinksLocked() {
atomic.AddUint32(&i.nlink, 1)
}
-// decLinksLocked decrements i's link count.
+// decLinksLocked decrements i's link count. If the link count reaches 0, we
+// remove a reference on i as well.
//
// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
func (i *inode) decLinksLocked() {
if i.nlink == 0 {
panic("tmpfs.inode.decLinksLocked() called with no existing links")
}
- atomic.AddUint32(&i.nlink, ^uint32(0))
+ if atomic.AddUint32(&i.nlink, ^uint32(0)) == 0 {
+ i.decRef()
+ }
}
func (i *inode) incRef() {
@@ -306,6 +336,7 @@ func (i *inode) tryIncRef() bool {
func (i *inode) decRef() {
if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
+ i.watches.HandleDeletion()
if regFile, ok := i.impl.(*regularFile); ok {
// Release memory used by regFile to store data. Since regFile is
// no longer usable, we don't need to grab any locks or update any
@@ -627,8 +658,12 @@ func (fd *fileDescription) filesystem() *filesystem {
return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
}
+func (fd *fileDescription) dentry() *dentry {
+ return fd.vfsfd.Dentry().Impl().(*dentry)
+}
+
func (fd *fileDescription) inode() *inode {
- return fd.vfsfd.Dentry().Impl().(*dentry).inode
+ return fd.dentry().inode
}
// Stat implements vfs.FileDescriptionImpl.Stat.
@@ -641,7 +676,15 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
creds := auth.CredentialsFromContext(ctx)
- return fd.inode().setStat(ctx, creds, &opts.Stat)
+ d := fd.dentry()
+ if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil {
+ return err
+ }
+
+ if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ d.InotifyWithParent(ev, 0, vfs.InodeEvent)
+ }
+ return nil
}
// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
@@ -656,12 +699,26 @@ func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOption
// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
- return fd.inode().setxattr(auth.CredentialsFromContext(ctx), &opts)
+ d := fd.dentry()
+ if err := d.inode.setxattr(auth.CredentialsFromContext(ctx), &opts); err != nil {
+ return err
+ }
+
+ // Generate inotify events.
+ d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
- return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name)
+ d := fd.dentry()
+ if err := d.inode.removexattr(auth.CredentialsFromContext(ctx), name); err != nil {
+ return err
+ }
+
+ // Generate inotify events.
+ d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ return nil
}
// NewMemfd creates a new tmpfs regular file and file description that can back
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index ed40b5303..dbfcef0fa 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -152,7 +152,13 @@ func (f *FDTable) drop(file *fs.File) {
// dropVFS2 drops the table reference.
func (f *FDTable) dropVFS2(file *vfs.FileDescription) {
// TODO(gvisor.dev/issue/1480): Release locks.
- // TODO(gvisor.dev/issue/1479): Send inotify events.
+
+ // Generate inotify events.
+ ev := uint32(linux.IN_CLOSE_NOWRITE)
+ if file.IsWritable() {
+ ev = linux.IN_CLOSE_WRITE
+ }
+ file.Dentry().InotifyWithParent(ev, 0, vfs.PathEvent)
// Drop the table reference.
file.DecRef()
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 9add7c944..2407014e9 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index e42505542..750751aa3 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -60,6 +60,12 @@ func rdonlyRegionsForSetMem() (phyRegions []physicalRegion) {
if !vr.accessType.Write && vr.accessType.Read {
rdonlyRegions = append(rdonlyRegions, vr.region)
}
+
+ // TODO(gvisor.dev/issue/2686): PROT_NONE should be specially treated.
+ // Workaround: treated as rdonly temporarily.
+ if !vr.accessType.Write && !vr.accessType.Read && !vr.accessType.Execute {
+ rdonlyRegions = append(rdonlyRegions, vr.region)
+ }
})
for _, r := range rdonlyRegions {
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index f04be2ab5..de7df4f80 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
index 2ae6b9f9d..0bee995e4 100644
--- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 789bb94c8..66015e2bc 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -64,6 +64,8 @@ const enableLogging = false
var emptyFilter = stack.IPHeaderFilter{
Dst: "\x00\x00\x00\x00",
DstMask: "\x00\x00\x00\x00",
+ Src: "\x00\x00\x00\x00",
+ SrcMask: "\x00\x00\x00\x00",
}
// nflog logs messages related to the writing and reading of iptables.
@@ -142,31 +144,27 @@ func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
}
func findTable(stk *stack.Stack, tablename linux.TableName) (stack.Table, error) {
- ipt := stk.IPTables()
- table, ok := ipt.Tables[tablename.String()]
+ table, ok := stk.IPTables().GetTable(tablename.String())
if !ok {
return stack.Table{}, fmt.Errorf("couldn't find table %q", tablename)
}
return table, nil
}
-// FillDefaultIPTables sets stack's IPTables to the default tables and
-// populates them with metadata.
-func FillDefaultIPTables(stk *stack.Stack) {
- ipt := stack.DefaultTables()
-
- // In order to fill in the metadata, we have to translate ipt from its
- // netstack format to Linux's giant-binary-blob format.
- for name, table := range ipt.Tables {
- _, metadata, err := convertNetstackToBinary(name, table)
- if err != nil {
- panic(fmt.Errorf("Unable to set default IP tables: %v", err))
+// FillIPTablesMetadata populates stack's IPTables with metadata.
+func FillIPTablesMetadata(stk *stack.Stack) {
+ stk.IPTables().ModifyTables(func(tables map[string]stack.Table) {
+ // In order to fill in the metadata, we have to translate ipt from its
+ // netstack format to Linux's giant-binary-blob format.
+ for name, table := range tables {
+ _, metadata, err := convertNetstackToBinary(name, table)
+ if err != nil {
+ panic(fmt.Errorf("Unable to set default IP tables: %v", err))
+ }
+ table.SetMetadata(metadata)
+ tables[name] = table
}
- table.SetMetadata(metadata)
- ipt.Tables[name] = table
- }
-
- stk.SetIPTables(ipt)
+ })
}
// convertNetstackToBinary converts the iptables as stored in netstack to the
@@ -214,11 +212,16 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI
}
copy(entry.IPTEntry.IP.Dst[:], rule.Filter.Dst)
copy(entry.IPTEntry.IP.DstMask[:], rule.Filter.DstMask)
+ copy(entry.IPTEntry.IP.Src[:], rule.Filter.Src)
+ copy(entry.IPTEntry.IP.SrcMask[:], rule.Filter.SrcMask)
copy(entry.IPTEntry.IP.OutputInterface[:], rule.Filter.OutputInterface)
copy(entry.IPTEntry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
if rule.Filter.DstInvert {
entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_DSTIP
}
+ if rule.Filter.SrcInvert {
+ entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_SRCIP
+ }
if rule.Filter.OutputInterfaceInvert {
entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
}
@@ -566,15 +569,13 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
// - There are no chains without an unconditional final rule.
// - There are no chains without an unconditional underflow rule.
- ipt := stk.IPTables()
table.SetMetadata(metadata{
HookEntry: replace.HookEntry,
Underflow: replace.Underflow,
NumEntries: replace.NumEntries,
Size: replace.Size,
})
- ipt.Tables[replace.Name.String()] = table
- stk.SetIPTables(ipt)
+ stk.IPTables().ReplaceTable(replace.Name.String(), table)
return nil
}
@@ -737,6 +738,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize {
return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
}
+ if len(iptip.Src) != header.IPv4AddressSize || len(iptip.SrcMask) != header.IPv4AddressSize {
+ return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
+ }
n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
if n == -1 {
@@ -755,6 +759,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
Dst: tcpip.Address(iptip.Dst[:]),
DstMask: tcpip.Address(iptip.DstMask[:]),
DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
+ Src: tcpip.Address(iptip.Src[:]),
+ SrcMask: tcpip.Address(iptip.SrcMask[:]),
+ SrcInvert: iptip.InverseFlags&linux.IPT_INV_SRCIP != 0,
OutputInterface: ifname,
OutputInterfaceMask: ifnameMask,
OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0,
@@ -765,15 +772,13 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool {
// The following features are supported:
// - Protocol
// - Dst and DstMask
+ // - Src and SrcMask
// - The inverse destination IP check flag
// - OutputInterface, OutputInterfaceMask and its inverse.
- var emptyInetAddr = linux.InetAddr{}
var emptyInterface = [linux.IFNAMSIZ]byte{}
// Disable any supported inverse flags.
- inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_VIA_OUT)
- return iptip.Src != emptyInetAddr ||
- iptip.SrcMask != emptyInetAddr ||
- iptip.InputInterface != emptyInterface ||
+ inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_SRCIP) | uint8(linux.IPT_INV_VIA_OUT)
+ return iptip.InputInterface != emptyInterface ||
iptip.InputInterfaceMask != emptyInterface ||
iptip.Flags != 0 ||
iptip.InverseFlags&^inverseMask != 0
diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go
index 3863293c7..1b4e0ad79 100644
--- a/pkg/sentry/socket/netfilter/owner_matcher.go
+++ b/pkg/sentry/socket/netfilter/owner_matcher.go
@@ -111,7 +111,7 @@ func (*OwnerMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (om *OwnerMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
// Support only for OUTPUT chain.
// TODO(gvisor.dev/issue/170): Need to support for POSTROUTING chain also.
if hook != stack.Output {
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 57a1e1c12..ebabdf334 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -96,7 +96,7 @@ func (*TCPMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (tm *TCPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
netHeader := header.IPv4(pkt.NetworkHeader)
if netHeader.TransportProtocol() != header.TCPProtocolNumber {
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index cfa9e621d..98b9943f8 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -93,7 +93,7 @@ func (*UDPMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (um *UDPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
netHeader := header.IPv4(pkt.NetworkHeader)
// TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index f5fa18136..9b44c2b89 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -362,14 +362,13 @@ func (s *Stack) RouteTable() []inet.Route {
}
// IPTables returns the stack's iptables.
-func (s *Stack) IPTables() (stack.IPTables, error) {
+func (s *Stack) IPTables() (*stack.IPTables, error) {
return s.Stack.IPTables(), nil
}
-// FillDefaultIPTables sets the stack's iptables to the default tables, which
-// allow and do not modify all traffic.
-func (s *Stack) FillDefaultIPTables() {
- netfilter.FillDefaultIPTables(s.Stack)
+// FillIPTablesMetadata populates stack's IPTables with metadata.
+func (s *Stack) FillIPTablesMetadata() {
+ netfilter.FillIPTablesMetadata(s.Stack)
}
// Resume implements inet.Stack.Resume.
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index ce5b94ee7..09c6d3b27 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -252,7 +252,7 @@ func (e *connectionedEndpoint) Close() {
// BidirectionalConnect implements BoundEndpoint.BidirectionalConnect.
func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error {
if ce.Type() != e.stype {
- return syserr.ErrConnectionRefused
+ return syserr.ErrWrongProtocolForSocket
}
// Check if ce is e to avoid a deadlock.
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 5b29e9d7f..c4c9db81b 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -417,7 +417,18 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
defer ep.Release()
// Connect the server endpoint.
- return s.ep.Connect(t, ep)
+ err = s.ep.Connect(t, ep)
+
+ if err == syserr.ErrWrongProtocolForSocket {
+ // Linux for abstract sockets returns ErrConnectionRefused
+ // instead of ErrWrongProtocolForSocket.
+ path, _ := extractPath(sockaddr)
+ if len(path) > 0 && path[0] == 0 {
+ err = syserr.ErrConnectionRefused
+ }
+ }
+
+ return err
}
// Write implements fs.FileOperations.Write.
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index 39f2b79ec..77c78889d 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -80,6 +80,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB
}
}
+ if total > 0 {
+ // On Linux, inotify behavior is not very consistent with splice(2). We try
+ // our best to emulate Linux for very basic calls to splice, where for some
+ // reason, events are generated for output files, but not input files.
+ outFile.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
+ }
return total, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go
index 2de5e3422..c24946160 100644
--- a/pkg/sentry/syscalls/linux/sys_xattr.go
+++ b/pkg/sentry/syscalls/linux/sys_xattr.go
@@ -207,7 +207,11 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si
return syserror.EOPNOTSUPP
}
- return d.Inode.SetXattr(t, d, name, value, flags)
+ if err := d.Inode.SetXattr(t, d, name, value, flags); err != nil {
+ return err
+ }
+ d.InotifyEvent(linux.IN_ATTRIB, 0)
+ return nil
}
func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
@@ -418,7 +422,11 @@ func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error {
return syserror.EOPNOTSUPP
}
- return d.Inode.RemoveXattr(t, d, name)
+ if err := d.Inode.RemoveXattr(t, d, name); err != nil {
+ return err
+ }
+ d.InotifyEvent(linux.IN_ATTRIB, 0)
+ return nil
}
// LINT.ThenChange(vfs2/xattr.go)
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index d56927ff5..9c8b44f64 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -12,6 +12,7 @@ go_library(
"filesystem.go",
"fscontext.go",
"getdents.go",
+ "inotify.go",
"ioctl.go",
"memfd.go",
"mmap.go",
diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go
new file mode 100644
index 000000000..7d50b6a16
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go
@@ -0,0 +1,134 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs2
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+const allFlags = linux.IN_NONBLOCK | linux.IN_CLOEXEC
+
+// InotifyInit1 implements the inotify_init1() syscalls.
+func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ flags := args[0].Int()
+ if flags&^allFlags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ ino, err := vfs.NewInotifyFD(t, t.Kernel().VFS(), uint32(flags))
+ if err != nil {
+ return 0, nil, err
+ }
+ defer ino.DecRef()
+
+ fd, err := t.NewFDFromVFS2(0, ino, kernel.FDFlags{
+ CloseOnExec: flags&linux.IN_CLOEXEC != 0,
+ })
+
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(fd), nil, nil
+}
+
+// InotifyInit implements the inotify_init() syscalls.
+func InotifyInit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ args[0].Value = 0
+ return InotifyInit1(t, args)
+}
+
+// fdToInotify resolves an fd to an inotify object. If successful, the file will
+// have an extra ref and the caller is responsible for releasing the ref.
+func fdToInotify(t *kernel.Task, fd int32) (*vfs.Inotify, *vfs.FileDescription, error) {
+ f := t.GetFileVFS2(fd)
+ if f == nil {
+ // Invalid fd.
+ return nil, nil, syserror.EBADF
+ }
+
+ ino, ok := f.Impl().(*vfs.Inotify)
+ if !ok {
+ // Not an inotify fd.
+ f.DecRef()
+ return nil, nil, syserror.EINVAL
+ }
+
+ return ino, f, nil
+}
+
+// InotifyAddWatch implements the inotify_add_watch() syscall.
+func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ addr := args[1].Pointer()
+ mask := args[2].Uint()
+
+ // "EINVAL: The given event mask contains no valid events."
+ // -- inotify_add_watch(2)
+ if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // "IN_DONT_FOLLOW: Don't dereference pathname if it is a symbolic link."
+ // -- inotify(7)
+ follow := followFinalSymlink
+ if mask&linux.IN_DONT_FOLLOW == 0 {
+ follow = nofollowFinalSymlink
+ }
+
+ ino, f, err := fdToInotify(t, fd)
+ if err != nil {
+ return 0, nil, err
+ }
+ defer f.DecRef()
+
+ path, err := copyInPath(t, addr)
+ if err != nil {
+ return 0, nil, err
+ }
+ if mask&linux.IN_ONLYDIR != 0 {
+ path.Dir = true
+ }
+ tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, follow)
+ if err != nil {
+ return 0, nil, err
+ }
+ defer tpop.Release()
+ d, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{})
+ if err != nil {
+ return 0, nil, err
+ }
+ defer d.DecRef()
+
+ fd = ino.AddWatch(d.Dentry(), mask)
+ return uintptr(fd), nil, err
+}
+
+// InotifyRmWatch implements the inotify_rm_watch() syscall.
+func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ wd := args[1].Int()
+
+ ino, f, err := fdToInotify(t, fd)
+ if err != nil {
+ return 0, nil, err
+ }
+ defer f.DecRef()
+ return 0, nil, ino.RmWatch(wd)
+}
diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go
index 3a7ef24f5..7f9debd4a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/read_write.go
+++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go
@@ -93,11 +93,17 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
n, err := file.Read(t, dst, opts)
if err != syserror.ErrWouldBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return n, err
}
@@ -128,6 +134,9 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt
}
file.EventUnregister(&w)
+ if total > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return total, err
}
@@ -248,11 +257,17 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
n, err := file.PRead(t, dst, offset, opts)
if err != syserror.ErrWouldBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return n, err
}
@@ -283,6 +298,9 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of
}
file.EventUnregister(&w)
+ if total > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return total, err
}
@@ -345,11 +363,17 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
n, err := file.Write(t, src, opts)
if err != syserror.ErrWouldBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+ }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+ }
return n, err
}
@@ -380,6 +404,9 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op
}
file.EventUnregister(&w)
+ if total > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+ }
return total, err
}
@@ -500,11 +527,17 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
n, err := file.PWrite(t, src, offset, opts)
if err != syserror.ErrWouldBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+ }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
+ if n > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return n, err
}
@@ -535,6 +568,9 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o
}
file.EventUnregister(&w)
+ if total > 0 {
+ file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+ }
return total, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 8f3c22a02..945a364a7 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -187,6 +187,11 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
if n == 0 {
return 0, nil, err
}
+
+ // On Linux, inotify behavior is not very consistent with splice(2). We try
+ // our best to emulate Linux for very basic calls to splice, where for some
+ // reason, events are generated for output files, but not input files.
+ outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
return uintptr(n), nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 083fdcf82..ef8358b8a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -116,9 +116,9 @@ func Override() {
s.Table[232] = syscalls.Supported("epoll_wait", EpollWait)
s.Table[233] = syscalls.Supported("epoll_ctl", EpollCtl)
s.Table[235] = syscalls.Supported("utimes", Utimes)
- delete(s.Table, 253) // inotify_init
- delete(s.Table, 254) // inotify_add_watch
- delete(s.Table, 255) // inotify_rm_watch
+ s.Table[253] = syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil)
+ s.Table[254] = syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil)
+ s.Table[255] = syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil)
s.Table[257] = syscalls.Supported("openat", Openat)
s.Table[258] = syscalls.Supported("mkdirat", Mkdirat)
s.Table[259] = syscalls.Supported("mknodat", Mknodat)
@@ -151,7 +151,7 @@ func Override() {
s.Table[291] = syscalls.Supported("epoll_create1", EpollCreate1)
s.Table[292] = syscalls.Supported("dup3", Dup3)
s.Table[293] = syscalls.Supported("pipe2", Pipe2)
- delete(s.Table, 294) // inotify_init1
+ s.Table[294] = syscalls.PartiallySupported("inotify_init1", InotifyInit1, "inotify events are only available inside the sandbox.", nil)
s.Table[295] = syscalls.Supported("preadv", Preadv)
s.Table[296] = syscalls.Supported("pwritev", Pwritev)
s.Table[299] = syscalls.Supported("recvmmsg", RecvMMsg)
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 94d69c1cc..774cc66cc 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -15,6 +15,18 @@ go_template_instance(
},
)
+go_template_instance(
+ name = "event_list",
+ out = "event_list.go",
+ package = "vfs",
+ prefix = "event",
+ template = "//pkg/ilist:generic_list",
+ types = {
+ "Element": "*Event",
+ "Linker": "*Event",
+ },
+)
+
go_library(
name = "vfs",
srcs = [
@@ -25,11 +37,13 @@ go_library(
"device.go",
"epoll.go",
"epoll_interest_list.go",
+ "event_list.go",
"file_description.go",
"file_description_impl_util.go",
"filesystem.go",
"filesystem_impl_util.go",
"filesystem_type.go",
+ "inotify.go",
"mount.go",
"mount_unsafe.go",
"options.go",
@@ -57,6 +71,7 @@ go_library(
"//pkg/sentry/limits",
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
+ "//pkg/sentry/uniqueid",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 9aa133bcb..66f3105bd 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -39,8 +39,8 @@ Mount references are held by:
- Mount: Each referenced Mount holds a reference on its parent, which is the
mount containing its mount point.
-- VirtualFilesystem: A reference is held on each Mount that has not been
- umounted.
+- VirtualFilesystem: A reference is held on each Mount that has been connected
+ to a mount point, but not yet umounted.
MountNamespace and FileDescription references are held by users of VFS. The
expectation is that each `kernel.Task` holds a reference on its corresponding
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index caf770fd5..b7c6b60b8 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -297,3 +297,15 @@ func (d *anonDentry) TryIncRef() bool {
func (d *anonDentry) DecRef() {
// no-op
}
+
+// InotifyWithParent implements DentryImpl.InotifyWithParent.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32, et EventType) {}
+
+// Watches implements DentryImpl.Watches.
+//
+// TODO(gvisor.dev/issue/1479): Implement inotify.
+func (d *anonDentry) Watches() *Watches {
+ return nil
+}
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index 8624dbd5d..24af13eb1 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -103,6 +103,22 @@ type DentryImpl interface {
// DecRef decrements the Dentry's reference count.
DecRef()
+
+ // InotifyWithParent notifies all watches on the targets represented by this
+ // dentry and its parent. The parent's watches are notified first, followed
+ // by this dentry's.
+ //
+ // InotifyWithParent automatically adds the IN_ISDIR flag for dentries
+ // representing directories.
+ //
+ // Note that the events may not actually propagate up to the user, depending
+ // on the event masks.
+ InotifyWithParent(events uint32, cookie uint32, et EventType)
+
+ // Watches returns the set of inotify watches for the file corresponding to
+ // the Dentry. Dentries that are hard links to the same underlying file
+ // share the same watches.
+ Watches() *Watches
}
// IncRef increments d's reference count.
@@ -133,6 +149,17 @@ func (d *Dentry) isMounted() bool {
return atomic.LoadUint32(&d.mounts) != 0
}
+// InotifyWithParent notifies all watches on the inodes for this dentry and
+// its parent of events.
+func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et EventType) {
+ d.impl.InotifyWithParent(events, cookie, et)
+}
+
+// Watches returns the set of inotify watches associated with d.
+func (d *Dentry) Watches() *Watches {
+ return d.impl.Watches()
+}
+
// The following functions are exported so that filesystem implementations can
// use them. The vfs package, and users of VFS, should not call these
// functions.
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
new file mode 100644
index 000000000..05a3051a4
--- /dev/null
+++ b/pkg/sentry/vfs/inotify.go
@@ -0,0 +1,697 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "bytes"
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+// inotifyEventBaseSize is the base size of linux's struct inotify_event. This
+// must be a power 2 for rounding below.
+const inotifyEventBaseSize = 16
+
+// EventType defines different kinds of inotfiy events.
+//
+// The way events are labelled appears somewhat arbitrary, but they must match
+// Linux so that IN_EXCL_UNLINK behaves as it does in Linux.
+type EventType uint8
+
+// PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and
+// FSNOTIFY_EVENT_INODE in Linux.
+const (
+ PathEvent EventType = iota
+ InodeEvent EventType = iota
+)
+
+// Inotify represents an inotify instance created by inotify_init(2) or
+// inotify_init1(2). Inotify implements FileDescriptionImpl.
+//
+// Lock ordering:
+// Inotify.mu -> Watches.mu -> Inotify.evMu
+//
+// +stateify savable
+type Inotify struct {
+ vfsfd FileDescription
+ FileDescriptionDefaultImpl
+ DentryMetadataFileDescriptionImpl
+
+ // Unique identifier for this inotify instance. We don't just reuse the
+ // inotify fd because fds can be duped. These should not be exposed to the
+ // user, since we may aggressively reuse an id on S/R.
+ id uint64
+
+ // queue is used to notify interested parties when the inotify instance
+ // becomes readable or writable.
+ queue waiter.Queue `state:"nosave"`
+
+ // evMu *only* protects the events list. We need a separate lock while
+ // queuing events: using mu may violate lock ordering, since at that point
+ // the calling goroutine may already hold Watches.mu.
+ evMu sync.Mutex `state:"nosave"`
+
+ // A list of pending events for this inotify instance. Protected by evMu.
+ events eventList
+
+ // A scratch buffer, used to serialize inotify events. Allocate this
+ // ahead of time for the sake of performance. Protected by evMu.
+ scratch []byte
+
+ // mu protects the fields below.
+ mu sync.Mutex `state:"nosave"`
+
+ // nextWatchMinusOne is used to allocate watch descriptors on this Inotify
+ // instance. Note that Linux starts numbering watch descriptors from 1.
+ nextWatchMinusOne int32
+
+ // Map from watch descriptors to watch objects.
+ watches map[int32]*Watch
+}
+
+var _ FileDescriptionImpl = (*Inotify)(nil)
+
+// NewInotifyFD constructs a new Inotify instance.
+func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) (*FileDescription, error) {
+ // O_CLOEXEC affects file descriptors, so it must be handled outside of vfs.
+ flags &^= linux.O_CLOEXEC
+ if flags&^linux.O_NONBLOCK != 0 {
+ return nil, syserror.EINVAL
+ }
+
+ id := uniqueid.GlobalFromContext(ctx)
+ vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id))
+ defer vd.DecRef()
+ fd := &Inotify{
+ id: id,
+ scratch: make([]byte, inotifyEventBaseSize),
+ watches: make(map[int32]*Watch),
+ }
+ if err := fd.vfsfd.Init(fd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
+ UseDentryMetadata: true,
+ DenyPRead: true,
+ DenyPWrite: true,
+ }); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+}
+
+// Release implements FileDescriptionImpl.Release. Release removes all
+// watches and frees all resources for an inotify instance.
+func (i *Inotify) Release() {
+ // We need to hold i.mu to avoid a race with concurrent calls to
+ // Inotify.handleDeletion from Watches. There's no risk of Watches
+ // accessing this Inotify after the destructor ends, because we remove all
+ // references to it below.
+ i.mu.Lock()
+ defer i.mu.Unlock()
+ for _, w := range i.watches {
+ // Remove references to the watch from the watches set on the target. We
+ // don't need to worry about the references from i.watches, since this
+ // file description is about to be destroyed.
+ w.set.Remove(i.id)
+ }
+}
+
+// EventRegister implements waiter.Waitable.
+func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ i.queue.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.
+func (i *Inotify) EventUnregister(e *waiter.Entry) {
+ i.queue.EventUnregister(e)
+}
+
+// Readiness implements waiter.Waitable.Readiness.
+//
+// Readiness indicates whether there are pending events for an inotify instance.
+func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask {
+ ready := waiter.EventMask(0)
+
+ i.evMu.Lock()
+ defer i.evMu.Unlock()
+
+ if !i.events.Empty() {
+ ready |= waiter.EventIn
+ }
+
+ return mask & ready
+}
+
+// PRead implements FileDescriptionImpl.
+func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
+ return 0, syserror.ESPIPE
+}
+
+// PWrite implements FileDescriptionImpl.
+func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
+ return 0, syserror.ESPIPE
+}
+
+// Write implements FileDescriptionImpl.Write.
+func (*Inotify) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Read implements FileDescriptionImpl.Read.
+func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+ if dst.NumBytes() < inotifyEventBaseSize {
+ return 0, syserror.EINVAL
+ }
+
+ i.evMu.Lock()
+ defer i.evMu.Unlock()
+
+ if i.events.Empty() {
+ // Nothing to read yet, tell caller to block.
+ return 0, syserror.ErrWouldBlock
+ }
+
+ var writeLen int64
+ for it := i.events.Front(); it != nil; {
+ // Advance `it` before the element is removed from the list, or else
+ // it.Next() will always be nil.
+ event := it
+ it = it.Next()
+
+ // Does the buffer have enough remaining space to hold the event we're
+ // about to write out?
+ if dst.NumBytes() < int64(event.sizeOf()) {
+ if writeLen > 0 {
+ // Buffer wasn't big enough for all pending events, but we did
+ // write some events out.
+ return writeLen, nil
+ }
+ return 0, syserror.EINVAL
+ }
+
+ // Linux always dequeues an available event as long as there's enough
+ // buffer space to copy it out, even if the copy below fails. Emulate
+ // this behaviour.
+ i.events.Remove(event)
+
+ // Buffer has enough space, copy event to the read buffer.
+ n, err := event.CopyTo(ctx, i.scratch, dst)
+ if err != nil {
+ return 0, err
+ }
+
+ writeLen += n
+ dst = dst.DropFirst64(n)
+ }
+ return writeLen, nil
+}
+
+// Ioctl implements fs.FileOperations.Ioctl.
+func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ switch args[1].Int() {
+ case linux.FIONREAD:
+ i.evMu.Lock()
+ defer i.evMu.Unlock()
+ var n uint32
+ for e := i.events.Front(); e != nil; e = e.Next() {
+ n += uint32(e.sizeOf())
+ }
+ var buf [4]byte
+ usermem.ByteOrder.PutUint32(buf[:], n)
+ _, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
+ return 0, err
+
+ default:
+ return 0, syserror.ENOTTY
+ }
+}
+
+func (i *Inotify) queueEvent(ev *Event) {
+ i.evMu.Lock()
+
+ // Check if we should coalesce the event we're about to queue with the last
+ // one currently in the queue. Events are coalesced if they are identical.
+ if last := i.events.Back(); last != nil {
+ if ev.equals(last) {
+ // "Coalesce" the two events by simply not queuing the new one. We
+ // don't need to raise a waiter.EventIn notification because no new
+ // data is available for reading.
+ i.evMu.Unlock()
+ return
+ }
+ }
+
+ i.events.PushBack(ev)
+
+ // Release mutex before notifying waiters because we don't control what they
+ // can do.
+ i.evMu.Unlock()
+
+ i.queue.Notify(waiter.EventIn)
+}
+
+// newWatchLocked creates and adds a new watch to target.
+//
+// Precondition: i.mu must be locked.
+func (i *Inotify) newWatchLocked(target *Dentry, mask uint32) *Watch {
+ targetWatches := target.Watches()
+ w := &Watch{
+ owner: i,
+ wd: i.nextWatchIDLocked(),
+ set: targetWatches,
+ mask: mask,
+ }
+
+ // Hold the watch in this inotify instance as well as the watch set on the
+ // target.
+ i.watches[w.wd] = w
+ targetWatches.Add(w)
+ return w
+}
+
+// newWatchIDLocked allocates and returns a new watch descriptor.
+//
+// Precondition: i.mu must be locked.
+func (i *Inotify) nextWatchIDLocked() int32 {
+ i.nextWatchMinusOne++
+ return i.nextWatchMinusOne
+}
+
+// handleDeletion handles the deletion of the target of watch w. It removes w
+// from i.watches and a watch removal event is generated.
+func (i *Inotify) handleDeletion(w *Watch) {
+ i.mu.Lock()
+ _, found := i.watches[w.wd]
+ delete(i.watches, w.wd)
+ i.mu.Unlock()
+
+ if found {
+ i.queueEvent(newEvent(w.wd, "", linux.IN_IGNORED, 0))
+ }
+}
+
+// AddWatch constructs a new inotify watch and adds it to the target. It
+// returns the watch descriptor returned by inotify_add_watch(2).
+func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 {
+ // Note: Locking this inotify instance protects the result returned by
+ // Lookup() below. With the lock held, we know for sure the lookup result
+ // won't become stale because it's impossible for *this* instance to
+ // add/remove watches on target.
+ i.mu.Lock()
+ defer i.mu.Unlock()
+
+ // Does the target already have a watch from this inotify instance?
+ if existing := target.Watches().Lookup(i.id); existing != nil {
+ newmask := mask
+ if mask&linux.IN_MASK_ADD != 0 {
+ // "Add (OR) events to watch mask for this pathname if it already
+ // exists (instead of replacing mask)." -- inotify(7)
+ newmask |= atomic.LoadUint32(&existing.mask)
+ }
+ atomic.StoreUint32(&existing.mask, newmask)
+ return existing.wd
+ }
+
+ // No existing watch, create a new watch.
+ w := i.newWatchLocked(target, mask)
+ return w.wd
+}
+
+// RmWatch looks up an inotify watch for the given 'wd' and configures the
+// target to stop sending events to this inotify instance.
+func (i *Inotify) RmWatch(wd int32) error {
+ i.mu.Lock()
+
+ // Find the watch we were asked to removed.
+ w, ok := i.watches[wd]
+ if !ok {
+ i.mu.Unlock()
+ return syserror.EINVAL
+ }
+
+ // Remove the watch from this instance.
+ delete(i.watches, wd)
+
+ // Remove the watch from the watch target.
+ w.set.Remove(w.OwnerID())
+ i.mu.Unlock()
+
+ // Generate the event for the removal.
+ i.queueEvent(newEvent(wd, "", linux.IN_IGNORED, 0))
+
+ return nil
+}
+
+// Watches is the collection of all inotify watches on a single file.
+//
+// +stateify savable
+type Watches struct {
+ // mu protects the fields below.
+ mu sync.RWMutex `state:"nosave"`
+
+ // ws is the map of active watches in this collection, keyed by the inotify
+ // instance id of the owner.
+ ws map[uint64]*Watch
+}
+
+// Lookup returns the watch owned by an inotify instance with the given id.
+// Returns nil if no such watch exists.
+//
+// Precondition: the inotify instance with the given id must be locked to
+// prevent the returned watch from being concurrently modified or replaced in
+// Inotify.watches.
+func (w *Watches) Lookup(id uint64) *Watch {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ return w.ws[id]
+}
+
+// Add adds watch into this set of watches.
+//
+// Precondition: the inotify instance with the given id must be locked.
+func (w *Watches) Add(watch *Watch) {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+
+ owner := watch.OwnerID()
+ // Sanity check, we should never have two watches for one owner on the
+ // same target.
+ if _, exists := w.ws[owner]; exists {
+ panic(fmt.Sprintf("Watch collision with ID %+v", owner))
+ }
+ if w.ws == nil {
+ w.ws = make(map[uint64]*Watch)
+ }
+ w.ws[owner] = watch
+}
+
+// Remove removes a watch with the given id from this set of watches and
+// releases it. The caller is responsible for generating any watch removal
+// event, as appropriate. The provided id must match an existing watch in this
+// collection.
+//
+// Precondition: the inotify instance with the given id must be locked.
+func (w *Watches) Remove(id uint64) {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+
+ if w.ws == nil {
+ // This watch set is being destroyed. The thread executing the
+ // destructor is already in the process of deleting all our watches. We
+ // got here with no references on the target because we raced with the
+ // destructor notifying all the watch owners of destruction. See the
+ // comment in Watches.HandleDeletion for why this race exists.
+ return
+ }
+
+ if _, ok := w.ws[id]; !ok {
+ // While there's technically no problem with silently ignoring a missing
+ // watch, this is almost certainly a bug.
+ panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id))
+ }
+ delete(w.ws, id)
+}
+
+// Notify queues a new event with all watches in this set.
+func (w *Watches) Notify(name string, events, cookie uint32, et EventType) {
+ w.NotifyWithExclusions(name, events, cookie, et, false)
+}
+
+// NotifyWithExclusions queues a new event with watches in this set. Watches
+// with IN_EXCL_UNLINK are skipped if the event is coming from a child that
+// has been unlinked.
+func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et EventType, unlinked bool) {
+ // N.B. We don't defer the unlocks because Notify is in the hot path of
+ // all IO operations, and the defer costs too much for small IO
+ // operations.
+ w.mu.RLock()
+ for _, watch := range w.ws {
+ if unlinked && watch.ExcludeUnlinkedChildren() && et == PathEvent {
+ continue
+ }
+ watch.Notify(name, events, cookie)
+ }
+ w.mu.RUnlock()
+}
+
+// HandleDeletion is called when the watch target is destroyed to emit
+// the appropriate events.
+func (w *Watches) HandleDeletion() {
+ w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent)
+
+ // TODO(gvisor.dev/issue/1479): This doesn't work because maps are not copied
+ // by value. Ideally, we wouldn't have this circular locking so we can just
+ // notify of IN_DELETE_SELF in the same loop below.
+ //
+ // We can't hold w.mu while calling watch.handleDeletion to preserve lock
+ // ordering w.r.t to the owner inotify instances. Instead, atomically move
+ // the watches map into a local variable so we can iterate over it safely.
+ //
+ // Because of this however, it is possible for the watches' owners to reach
+ // this inode while the inode has no refs. This is still safe because the
+ // owners can only reach the inode until this function finishes calling
+ // watch.handleDeletion below and the inode is guaranteed to exist in the
+ // meantime. But we still have to be very careful not to rely on inode state
+ // that may have been already destroyed.
+ var ws map[uint64]*Watch
+ w.mu.Lock()
+ ws = w.ws
+ w.ws = nil
+ w.mu.Unlock()
+
+ for _, watch := range ws {
+ // TODO(gvisor.dev/issue/1479): consider refactoring this.
+ watch.handleDeletion()
+ }
+}
+
+// Watch represent a particular inotify watch created by inotify_add_watch.
+//
+// +stateify savable
+type Watch struct {
+ // Inotify instance which owns this watch.
+ owner *Inotify
+
+ // Descriptor for this watch. This is unique across an inotify instance.
+ wd int32
+
+ // set is the watch set containing this watch. It belongs to the target file
+ // of this watch.
+ set *Watches
+
+ // Events being monitored via this watch. Must be accessed with atomic
+ // memory operations.
+ mask uint32
+}
+
+// OwnerID returns the id of the inotify instance that owns this watch.
+func (w *Watch) OwnerID() uint64 {
+ return w.owner.id
+}
+
+// ExcludeUnlinkedChildren indicates whether the watched object should continue
+// to be notified of events of its children after they have been unlinked, e.g.
+// for an open file descriptor.
+//
+// TODO(gvisor.dev/issue/1479): Implement IN_EXCL_UNLINK.
+// We can do this by keeping track of the set of unlinked children in Watches
+// to skip notification.
+func (w *Watch) ExcludeUnlinkedChildren() bool {
+ return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0
+}
+
+// Notify queues a new event on this watch.
+func (w *Watch) Notify(name string, events uint32, cookie uint32) {
+ mask := atomic.LoadUint32(&w.mask)
+ if mask&events == 0 {
+ // We weren't watching for this event.
+ return
+ }
+
+ // Event mask should include bits matched from the watch plus all control
+ // event bits.
+ unmaskableBits := ^uint32(0) &^ linux.IN_ALL_EVENTS
+ effectiveMask := unmaskableBits | mask
+ matchedEvents := effectiveMask & events
+ w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie))
+}
+
+// handleDeletion handles the deletion of w's target.
+func (w *Watch) handleDeletion() {
+ w.owner.handleDeletion(w)
+}
+
+// Event represents a struct inotify_event from linux.
+//
+// +stateify savable
+type Event struct {
+ eventEntry
+
+ wd int32
+ mask uint32
+ cookie uint32
+
+ // len is computed based on the name field is set automatically by
+ // Event.setName. It should be 0 when no name is set; otherwise it is the
+ // length of the name slice.
+ len uint32
+
+ // The name field has special padding requirements and should only be set by
+ // calling Event.setName.
+ name []byte
+}
+
+func newEvent(wd int32, name string, events, cookie uint32) *Event {
+ e := &Event{
+ wd: wd,
+ mask: events,
+ cookie: cookie,
+ }
+ if name != "" {
+ e.setName(name)
+ }
+ return e
+}
+
+// paddedBytes converts a go string to a null-terminated c-string, padded with
+// null bytes to a total size of 'l'. 'l' must be large enough for all the bytes
+// in the 's' plus at least one null byte.
+func paddedBytes(s string, l uint32) []byte {
+ if l < uint32(len(s)+1) {
+ panic("Converting string to byte array results in truncation, this can lead to buffer-overflow due to the missing null-byte!")
+ }
+ b := make([]byte, l)
+ copy(b, s)
+
+ // b was zero-value initialized during make(), so the rest of the slice is
+ // already filled with null bytes.
+
+ return b
+}
+
+// setName sets the optional name for this event.
+func (e *Event) setName(name string) {
+ // We need to pad the name such that the entire event length ends up a
+ // multiple of inotifyEventBaseSize.
+ unpaddedLen := len(name) + 1
+ // Round up to nearest multiple of inotifyEventBaseSize.
+ e.len = uint32((unpaddedLen + inotifyEventBaseSize - 1) & ^(inotifyEventBaseSize - 1))
+ // Make sure we haven't overflowed and wrapped around when rounding.
+ if unpaddedLen > int(e.len) {
+ panic("Overflow when rounding inotify event size, the 'name' field was too big.")
+ }
+ e.name = paddedBytes(name, e.len)
+}
+
+func (e *Event) sizeOf() int {
+ s := inotifyEventBaseSize + int(e.len)
+ if s < inotifyEventBaseSize {
+ panic("overflow")
+ }
+ return s
+}
+
+// CopyTo serializes this event to dst. buf is used as a scratch buffer to
+// construct the output. We use a buffer allocated ahead of time for
+// performance. buf must be at least inotifyEventBaseSize bytes.
+func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) {
+ usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
+ usermem.ByteOrder.PutUint32(buf[4:], e.mask)
+ usermem.ByteOrder.PutUint32(buf[8:], e.cookie)
+ usermem.ByteOrder.PutUint32(buf[12:], e.len)
+
+ writeLen := 0
+
+ n, err := dst.CopyOut(ctx, buf)
+ if err != nil {
+ return 0, err
+ }
+ writeLen += n
+ dst = dst.DropFirst(n)
+
+ if e.len > 0 {
+ n, err = dst.CopyOut(ctx, e.name)
+ if err != nil {
+ return 0, err
+ }
+ writeLen += n
+ }
+
+ // Santiy check.
+ if writeLen != e.sizeOf() {
+ panic(fmt.Sprintf("Serialized unexpected amount of data for an event, expected %d, wrote %d.", e.sizeOf(), writeLen))
+ }
+
+ return int64(writeLen), nil
+}
+
+func (e *Event) equals(other *Event) bool {
+ return e.wd == other.wd &&
+ e.mask == other.mask &&
+ e.cookie == other.cookie &&
+ e.len == other.len &&
+ bytes.Equal(e.name, other.name)
+}
+
+// InotifyEventFromStatMask generates the appropriate events for an operation
+// that set the stats specified in mask.
+func InotifyEventFromStatMask(mask uint32) uint32 {
+ var ev uint32
+ if mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_MODE) != 0 {
+ ev |= linux.IN_ATTRIB
+ }
+ if mask&linux.STATX_SIZE != 0 {
+ ev |= linux.IN_MODIFY
+ }
+
+ if (mask & (linux.STATX_ATIME | linux.STATX_MTIME)) == (linux.STATX_ATIME | linux.STATX_MTIME) {
+ // Both times indicates a utime(s) call.
+ ev |= linux.IN_ATTRIB
+ } else if mask&linux.STATX_ATIME != 0 {
+ ev |= linux.IN_ACCESS
+ } else if mask&linux.STATX_MTIME != 0 {
+ mask |= linux.IN_MODIFY
+ }
+ return ev
+}
+
+// InotifyRemoveChild sends the appriopriate notifications to the watch sets of
+// the child being removed and its parent.
+func InotifyRemoveChild(self, parent *Watches, name string) {
+ self.Notify("", linux.IN_ATTRIB, 0, InodeEvent)
+ parent.Notify(name, linux.IN_DELETE, 0, InodeEvent)
+ // TODO(gvisor.dev/issue/1479): implement IN_EXCL_UNLINK.
+}
+
+// InotifyRename sends the appriopriate notifications to the watch sets of the
+// file being renamed and its old/new parents.
+func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, oldName, newName string, isDir bool) {
+ var dirEv uint32
+ if isDir {
+ dirEv = linux.IN_ISDIR
+ }
+ cookie := uniqueid.InotifyCookie(ctx)
+ oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent)
+ newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent)
+ // Somewhat surprisingly, self move events do not have a cookie.
+ renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent)
+}
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 02850b65c..3adb7c97d 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -28,9 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
-// lastMountID is used to allocate mount ids. Must be accessed atomically.
-var lastMountID uint64
-
// A Mount is a replacement of a Dentry (Mount.key.point) from one Filesystem
// (Mount.key.parent.fs) with a Dentry (Mount.root) from another Filesystem
// (Mount.fs), which applies to path resolution in the context of a particular
@@ -97,7 +94,7 @@ type Mount struct {
func newMount(vfs *VirtualFilesystem, fs *Filesystem, root *Dentry, mntns *MountNamespace, opts *MountOptions) *Mount {
mnt := &Mount{
- ID: atomic.AddUint64(&lastMountID, 1),
+ ID: atomic.AddUint64(&vfs.lastMountID, 1),
vfs: vfs,
fs: fs,
root: root,
@@ -111,6 +108,16 @@ func newMount(vfs *VirtualFilesystem, fs *Filesystem, root *Dentry, mntns *Mount
return mnt
}
+// Options returns a copy of the MountOptions currently applicable to mnt.
+func (mnt *Mount) Options() MountOptions {
+ mnt.vfs.mountMu.Lock()
+ defer mnt.vfs.mountMu.Unlock()
+ return MountOptions{
+ Flags: mnt.flags,
+ ReadOnly: mnt.readOnly(),
+ }
+}
+
// A MountNamespace is a collection of Mounts.
//
// MountNamespaces are reference-counted. Unless otherwise specified, all
@@ -148,7 +155,7 @@ type MountNamespace struct {
func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *GetFilesystemOptions) (*MountNamespace, error) {
rft := vfs.getFilesystemType(fsTypeName)
if rft == nil {
- ctx.Warningf("Unknown filesystem: %s", fsTypeName)
+ ctx.Warningf("Unknown filesystem type: %s", fsTypeName)
return nil, syserror.ENODEV
}
fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, *opts)
@@ -175,26 +182,34 @@ func (vfs *VirtualFilesystem) NewDisconnectedMount(fs *Filesystem, root *Dentry,
return newMount(vfs, fs, root, nil /* mntns */, opts), nil
}
-// MountAt creates and mounts a Filesystem configured by the given arguments.
-func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) error {
+// MountDisconnected creates a Filesystem configured by the given arguments,
+// then returns a Mount representing it. The new Mount is not associated with
+// any MountNamespace and is not connected to any other Mounts.
+func (vfs *VirtualFilesystem) MountDisconnected(ctx context.Context, creds *auth.Credentials, source string, fsTypeName string, opts *MountOptions) (*Mount, error) {
rft := vfs.getFilesystemType(fsTypeName)
if rft == nil {
- return syserror.ENODEV
+ return nil, syserror.ENODEV
}
if !opts.InternalMount && !rft.opts.AllowUserMount {
- return syserror.ENODEV
+ return nil, syserror.ENODEV
}
fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions)
if err != nil {
- return err
+ return nil, err
}
+ defer root.DecRef()
+ defer fs.DecRef()
+ return vfs.NewDisconnectedMount(fs, root, opts)
+}
+// ConnectMountAt connects mnt at the path represented by target.
+//
+// Preconditions: mnt must be disconnected.
+func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Credentials, mnt *Mount, target *PathOperation) error {
// We can't hold vfs.mountMu while calling FilesystemImpl methods due to
// lock ordering.
vd, err := vfs.GetDentryAt(ctx, creds, target, &GetDentryOptions{})
if err != nil {
- root.DecRef()
- fs.DecRef()
return err
}
vfs.mountMu.Lock()
@@ -204,8 +219,6 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia
vd.dentry.mu.Unlock()
vfs.mountMu.Unlock()
vd.DecRef()
- root.DecRef()
- fs.DecRef()
return syserror.ENOENT
}
// vd might have been mounted over between vfs.GetDentryAt() and
@@ -238,7 +251,6 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia
// point and the mount root are directories, or neither are, and returns
// ENOTDIR if this is not the case.
mntns := vd.mount.ns
- mnt := newMount(vfs, fs, root, mntns, opts)
vfs.mounts.seq.BeginWrite()
vfs.connectLocked(mnt, vd, mntns)
vfs.mounts.seq.EndWrite()
@@ -247,6 +259,19 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia
return nil
}
+// MountAt creates and mounts a Filesystem configured by the given arguments.
+func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) error {
+ mnt, err := vfs.MountDisconnected(ctx, creds, source, fsTypeName, opts)
+ if err != nil {
+ return err
+ }
+ defer mnt.DecRef()
+ if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil {
+ return err
+ }
+ return nil
+}
+
// UmountAt removes the Mount at the given path.
func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *UmountOptions) error {
if opts.Flags&^(linux.MNT_FORCE|linux.MNT_DETACH) != 0 {
@@ -369,14 +394,22 @@ func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecu
// references held by vd.
//
// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section. d.mu must be locked. mnt.parent() == nil.
+// writer critical section. d.mu must be locked. mnt.parent() == nil, i.e. mnt
+// must not already be connected.
func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns *MountNamespace) {
+ if checkInvariants {
+ if mnt.parent() != nil {
+ panic("VFS.connectLocked called on connected mount")
+ }
+ }
+ mnt.IncRef() // dropped by callers of umountRecursiveLocked
mnt.storeKey(vd)
if vd.mount.children == nil {
vd.mount.children = make(map[*Mount]struct{})
}
vd.mount.children[mnt] = struct{}{}
atomic.AddUint32(&vd.dentry.mounts, 1)
+ mnt.ns = mntns
mntns.mountpoints[vd.dentry]++
vfs.mounts.insertSeqed(mnt)
vfsmpmounts, ok := vfs.mountpoints[vd.dentry]
@@ -394,6 +427,11 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
// writer critical section. mnt.parent() != nil.
func (vfs *VirtualFilesystem) disconnectLocked(mnt *Mount) VirtualDentry {
vd := mnt.loadKey()
+ if checkInvariants {
+ if vd.mount != nil {
+ panic("VFS.disconnectLocked called on disconnected mount")
+ }
+ }
mnt.storeKey(VirtualDentry{})
delete(vd.mount.children, mnt)
atomic.AddUint32(&vd.dentry.mounts, math.MaxUint32) // -1
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index bc7581698..70f850ca4 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 8d7f8f8af..52643a7c5 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -82,6 +82,10 @@ type VirtualFilesystem struct {
// mountpoints is analogous to Linux's mountpoint_hashtable.
mountpoints map[*Dentry]map[*Mount]struct{}
+ // lastMountID is the last allocated mount ID. lastMountID is accessed
+ // using atomic memory operations.
+ lastMountID uint64
+
// anonMount is a Mount, not included in mounts or mountpoints,
// representing an anonFilesystem. anonMount is used to back
// VirtualDentries returned by VirtualFilesystem.NewAnonVirtualDentry().
@@ -418,6 +422,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
}
}
+ fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0, PathEvent)
return fd, nil
}
if !rp.handleError(err) {
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index 101497ed6..e2894f9f5 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -77,7 +77,10 @@ var DefaultOpts = Opts{
// trigger it.
const descheduleThreshold = 1 * time.Second
-var stuckTasks = metric.MustCreateNewUint64Metric("/watchdog/stuck_tasks_detected", true /* sync */, "Cumulative count of stuck tasks detected")
+var (
+ stuckStartup = metric.MustCreateNewUint64Metric("/watchdog/stuck_startup_detected", true /* sync */, "Incremented once on startup watchdog timeout")
+ stuckTasks = metric.MustCreateNewUint64Metric("/watchdog/stuck_tasks_detected", true /* sync */, "Cumulative count of stuck tasks detected")
+)
// Amount of time to wait before dumping the stack to the log again when the same task(s) remains stuck.
var stackDumpSameTaskPeriod = time.Minute
@@ -220,6 +223,9 @@ func (w *Watchdog) waitForStart() {
// We are fine.
return
}
+
+ stuckStartup.Increment()
+
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("Watchdog.Start() not called within %s", w.StartupTimeout))
w.doAction(w.StartupTimeoutAction, false, &buf)
@@ -328,8 +334,8 @@ func (w *Watchdog) reportStuckWatchdog() {
}
// doAction will take the given action. If the action is LogWarning, the stack
-// is not always dumpped to the log to prevent log flooding. "forceStack"
-// guarantees that the stack will be dumped regarless.
+// is not always dumped to the log to prevent log flooding. "forceStack"
+// guarantees that the stack will be dumped regardless.
func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) {
switch action {
case LogWarning:
diff --git a/pkg/sleep/sleep_unsafe.go b/pkg/sleep/sleep_unsafe.go
index 65bfcf778..f68c12620 100644
--- a/pkg/sleep/sleep_unsafe.go
+++ b/pkg/sleep/sleep_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.11
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sync/memmove_unsafe.go b/pkg/sync/memmove_unsafe.go
index ad4a3a37e..1d7780695 100644
--- a/pkg/sync/memmove_unsafe.go
+++ b/pkg/sync/memmove_unsafe.go
@@ -4,7 +4,7 @@
// license that can be found in the LICENSE file.
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go
index 3dd15578b..dc034d561 100644
--- a/pkg/sync/mutex_unsafe.go
+++ b/pkg/sync/mutex_unsafe.go
@@ -4,7 +4,7 @@
// license that can be found in the LICENSE file.
// +build go1.13
-// +build !go1.15
+// +build !go1.16
// When updating the build constraint (above), check that syncMutex matches the
// standard library sync.Mutex definition.
diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go
index ea6cdc447..995c0346e 100644
--- a/pkg/sync/rwmutex_unsafe.go
+++ b/pkg/sync/rwmutex_unsafe.go
@@ -4,7 +4,7 @@
// license that can be found in the LICENSE file.
// +build go1.13
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/syncevent/waiter_unsafe.go b/pkg/syncevent/waiter_unsafe.go
index 112e0e604..ad271e1a0 100644
--- a/pkg/syncevent/waiter_unsafe.go
+++ b/pkg/syncevent/waiter_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.11
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go
index 6e0db2741..d82ed5205 100644
--- a/pkg/tcpip/adapters/gonet/gonet.go
+++ b/pkg/tcpip/adapters/gonet/gonet.go
@@ -335,6 +335,11 @@ func (c *TCPConn) Read(b []byte) (int, error) {
deadline := c.readCancel()
numRead := 0
+ defer func() {
+ if numRead != 0 {
+ c.ep.ModerateRecvBuf(numRead)
+ }
+ }()
for numRead != len(b) {
if len(c.read) == 0 {
var err error
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index 9bf67686d..20b183da0 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -181,13 +181,13 @@ func (e *Endpoint) NumQueued() int {
}
// InjectInbound injects an inbound packet.
-func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
e.InjectLinkAddr(protocol, "", pkt)
}
// InjectLinkAddr injects an inbound packet with a remote link address.
-func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt stack.PacketBuffer) {
- e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, pkt)
+func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt *stack.PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket(remote, "" /* local */, protocol, pkt)
}
// Attach saves the stack network-layer dispatcher for use later when packets
@@ -229,13 +229,13 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
}
// WritePacket stores outbound packets into the channel.
-func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
// Clone r then release its resource so we only get the relevant fields from
// stack.Route without holding a reference to a NIC's endpoint.
route := r.Clone()
route.Release()
p := PacketInfo{
- Pkt: &pkt,
+ Pkt: pkt,
Proto: protocol,
GSO: gso,
Route: route,
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index affa1bbdf..f34082e1a 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -387,7 +387,7 @@ const (
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
if e.hdrSize > 0 {
// Add ethernet header if needed.
eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
@@ -641,8 +641,8 @@ func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
}
// InjectInbound injects an inbound packet.
-func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, pkt)
+func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, pkt)
}
// NewInjectable creates a new fd-based InjectableEndpoint.
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 3bfb15a8e..eaee7e5d7 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -45,7 +45,7 @@ const (
type packetInfo struct {
raddr tcpip.LinkAddress
proto tcpip.NetworkProtocolNumber
- contents stack.PacketBuffer
+ contents *stack.PacketBuffer
}
type context struct {
@@ -103,7 +103,7 @@ func (c *context) cleanup() {
}
}
-func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (c *context) DeliverNetworkPacket(remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
c.ch <- packetInfo{remote, protocol, pkt}
}
@@ -179,7 +179,7 @@ func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32, hash u
L3HdrLen: header.IPv4MaximumHeaderSize,
}
}
- if err := c.ep.WritePacket(r, gso, proto, stack.PacketBuffer{
+ if err := c.ep.WritePacket(r, gso, proto, &stack.PacketBuffer{
Header: hdr,
Data: payload.ToVectorisedView(),
Hash: hash,
@@ -295,7 +295,7 @@ func TestPreserveSrcAddress(t *testing.T) {
// WritePacket panics given a prependable with anything less than
// the minimum size of the ethernet header.
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
- if err := c.ep.WritePacket(r, nil /* gso */, proto, stack.PacketBuffer{
+ if err := c.ep.WritePacket(r, nil /* gso */, proto, &stack.PacketBuffer{
Header: hdr,
Data: buffer.VectorisedView{},
}); err != nil {
@@ -358,7 +358,7 @@ func TestDeliverPacket(t *testing.T) {
want := packetInfo{
raddr: raddr,
proto: proto,
- contents: stack.PacketBuffer{
+ contents: &stack.PacketBuffer{
Data: buffer.View(b).ToVectorisedView(),
LinkHeader: buffer.View(hdr),
},
diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go
index fe2bf3b0b..2dfd29aa9 100644
--- a/pkg/tcpip/link/fdbased/mmap.go
+++ b/pkg/tcpip/link/fdbased/mmap.go
@@ -191,7 +191,7 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
}
pkt = pkt[d.e.hdrSize:]
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, stack.PacketBuffer{
+ d.e.dispatcher.DeliverNetworkPacket(remote, local, p, &stack.PacketBuffer{
Data: buffer.View(pkt).ToVectorisedView(),
LinkHeader: buffer.View(eth),
})
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index cb4cbea69..f04738cfb 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -139,13 +139,13 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
}
used := d.capViews(n, BufConfig)
- pkt := stack.PacketBuffer{
+ pkt := &stack.PacketBuffer{
Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
LinkHeader: buffer.View(eth),
}
pkt.Data.TrimFront(d.e.hdrSize)
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
+ d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {
@@ -169,7 +169,7 @@ type recvMMsgDispatcher struct {
// iovecs is an array of array of iovec records where each iovec base
// pointer and length are initialzed to the corresponding view above,
- // except when GSO is neabled then the first iovec in each array of
+ // except when GSO is enabled then the first iovec in each array of
// iovecs points to a buffer for the vnet header which is stripped
// before the views are passed up the stack for further processing.
iovecs [][]syscall.Iovec
@@ -296,12 +296,12 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
}
used := d.capViews(k, int(n), BufConfig)
- pkt := stack.PacketBuffer{
+ pkt := &stack.PacketBuffer{
Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
LinkHeader: buffer.View(eth),
}
pkt.Data.TrimFront(d.e.hdrSize)
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
+ d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 073c84ef9..568c6874f 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -76,7 +76,7 @@ func (*endpoint) Wait() {}
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
-func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
views[0] = pkt.Header.View()
views = append(views, pkt.Data.Views()...)
@@ -84,7 +84,7 @@ func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.Netw
// Because we're immediately turning around and writing the packet back
// to the rx path, we intentionally don't preserve the remote and local
// link addresses from the stack.Route we're passed.
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, stack.PacketBuffer{
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
})
@@ -106,7 +106,7 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
}
linkHeader := header.Ethernet(hdr)
vv.TrimFront(len(linkHeader))
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), stack.PacketBuffer{
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, linkHeader.Type(), &stack.PacketBuffer{
Data: vv,
LinkHeader: buffer.View(linkHeader),
})
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index a5478ce17..c69d6b7e9 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -80,8 +80,8 @@ func (m *InjectableEndpoint) IsAttached() bool {
}
// InjectInbound implements stack.InjectableLinkEndpoint.
-func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
- m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, pkt)
+func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ m.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, pkt)
}
// WritePackets writes outbound packets to the appropriate
@@ -98,7 +98,7 @@ func (m *InjectableEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts s
// WritePacket writes outbound packets to the appropriate LinkInjectableEndpoint
// based on the RemoteAddress. HandleLocal only works if r.RemoteAddress has a
// route registered in this endpoint.
-func (m *InjectableEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (m *InjectableEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
if endpoint, ok := m.routes[r.RemoteAddress]; ok {
return endpoint.WritePacket(r, gso, protocol, pkt)
}
diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go
index 87c734c1f..0744f66d6 100644
--- a/pkg/tcpip/link/muxed/injectable_test.go
+++ b/pkg/tcpip/link/muxed/injectable_test.go
@@ -50,7 +50,7 @@ func TestInjectableEndpointDispatch(t *testing.T) {
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, stack.PacketBuffer{
+ endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buffer.NewViewFromBytes([]byte{0xFB}).ToVectorisedView(),
})
@@ -70,7 +70,7 @@ func TestInjectableEndpointDispatchHdrOnly(t *testing.T) {
hdr := buffer.NewPrependable(1)
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, stack.PacketBuffer{
+ endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buffer.NewView(0).ToVectorisedView(),
})
diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go
index 54432194d..b5dfb7850 100644
--- a/pkg/tcpip/link/qdisc/fifo/endpoint.go
+++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go
@@ -102,8 +102,8 @@ func (q *queueDispatcher) dispatchLoop() {
}
// DeliverNetworkPacket implements stack.NetworkDispatcher.DeliverNetworkPacket.
-func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
- e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt)
+func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt)
}
// Attach implements stack.LinkEndpoint.Attach.
@@ -146,7 +146,7 @@ func (e *endpoint) GSOMaxSize() uint32 {
}
// WritePacket implements stack.LinkEndpoint.WritePacket.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
// WritePacket caller's do not set the following fields in PacketBuffer
// so we populate them here.
newRoute := r.Clone()
@@ -154,7 +154,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
pkt.GSOOptions = gso
pkt.NetworkProtocolNumber = protocol
d := e.dispatchers[int(pkt.Hash)%len(e.dispatchers)]
- if !d.q.enqueue(&pkt) {
+ if !d.q.enqueue(pkt) {
return tcpip.ErrNoBufferSpace
}
d.newPacketWaker.Assert()
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
index 0b5a6cf49..99313ee25 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
@@ -14,7 +14,7 @@
// +build linux,amd64 linux,arm64
// +build go1.12
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 0796d717e..0374a2441 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -185,7 +185,7 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
// Add the ethernet header here.
eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
pkt.LinkHeader = buffer.View(eth)
@@ -275,7 +275,7 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
// Send packet up the stack.
eth := header.Ethernet(b[:header.EthernetMinimumSize])
- d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), stack.PacketBuffer{
+ d.DeliverNetworkPacket(eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), &stack.PacketBuffer{
Data: buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(),
LinkHeader: buffer.View(eth),
})
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 33f640b85..28a2e88ba 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -131,7 +131,7 @@ func newTestContext(t *testing.T, mtu, bufferSize uint32, addr tcpip.LinkAddress
return c
}
-func (c *testContext) DeliverNetworkPacket(_ stack.LinkEndpoint, remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (c *testContext) DeliverNetworkPacket(remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
c.mu.Lock()
c.packets = append(c.packets, packetInfo{
addr: remoteLinkAddr,
@@ -273,7 +273,7 @@ func TestSimpleSend(t *testing.T) {
randomFill(buf)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, nil /* gso */, proto, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, proto, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -345,7 +345,7 @@ func TestPreserveSrcAddressInSend(t *testing.T) {
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, nil /* gso */, proto, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, proto, &stack.PacketBuffer{
Header: hdr,
}); err != nil {
t.Fatalf("WritePacket failed: %v", err)
@@ -401,7 +401,7 @@ func TestFillTxQueue(t *testing.T) {
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -419,7 +419,7 @@ func TestFillTxQueue(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != want {
@@ -447,7 +447,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Send two packets so that the id slice has at least two slots.
for i := 2; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -470,7 +470,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -488,7 +488,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != want {
@@ -514,7 +514,7 @@ func TestFillTxMemory(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queueDataSize / bufferSize; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -533,7 +533,7 @@ func TestFillTxMemory(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
})
@@ -561,7 +561,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// until there is only one buffer left.
for i := queueDataSize/bufferSize - 1; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
@@ -577,7 +577,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
uu := buffer.NewView(bufferSize).ToVectorisedView()
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: uu,
}); err != want {
@@ -588,7 +588,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// Attempt to write the one-buffer packet again. It must succeed.
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
Data: buf.ToVectorisedView(),
}); err != nil {
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index da1c520ae..ae3186314 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -120,9 +120,9 @@ func NewWithWriter(lower stack.LinkEndpoint, writer io.Writer, snapLen uint32) (
// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
// called by the link-layer endpoint being wrapped when a packet arrives, and
// logs the packet before forwarding to the actual dispatcher.
-func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
- e.dumpPacket("recv", nil, protocol, &pkt)
- e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt)
+func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ e.dumpPacket("recv", nil, protocol, pkt)
+ e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt)
}
// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher
@@ -208,8 +208,8 @@ func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.Netw
// WritePacket implements the stack.LinkEndpoint interface. It is called by
// higher-level protocols to write packets; it just logs the packet and
// forwards the request to the lower endpoint.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
- e.dumpPacket("send", gso, protocol, &pkt)
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+ e.dumpPacket("send", gso, protocol, pkt)
return e.lower.WritePacket(r, gso, protocol, pkt)
}
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index 617446ea2..6bc9033d0 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -213,7 +213,7 @@ func (d *Device) Write(data []byte) (int64, error) {
remote = tcpip.LinkAddress(zeroMAC[:])
}
- pkt := stack.PacketBuffer{
+ pkt := &stack.PacketBuffer{
Data: buffer.View(data).ToVectorisedView(),
}
if ethHdr != nil {
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 2b3741276..949b3f2b2 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -50,12 +50,12 @@ func New(lower stack.LinkEndpoint) *Endpoint {
// It is called by the link-layer endpoint being wrapped when a packet arrives,
// and only forwards to the actual dispatcher if Wait or WaitDispatch haven't
// been called.
-func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
if !e.dispatchGate.Enter() {
return
}
- e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt)
+ e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt)
e.dispatchGate.Leave()
}
@@ -99,7 +99,7 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket implements stack.LinkEndpoint.WritePacket. It is called by
// higher-level protocols to write packets. It only forwards packets to the
// lower endpoint if Wait or WaitWrite haven't been called.
-func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
if !e.writeGate.Enter() {
return nil
}
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 54eb5322b..63bf40562 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -35,7 +35,7 @@ type countedEndpoint struct {
dispatcher stack.NetworkDispatcher
}
-func (e *countedEndpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (e *countedEndpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
e.dispatchCount++
}
@@ -65,7 +65,7 @@ func (e *countedEndpoint) LinkAddress() tcpip.LinkAddress {
return e.linkAddr
}
-func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
e.writeCount++
return nil
}
@@ -89,21 +89,21 @@ func TestWaitWrite(t *testing.T) {
wep := New(ep)
// Write and check that it goes through.
- wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
+ wep.WritePacket(nil, nil /* gso */, 0, &stack.PacketBuffer{})
if want := 1; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on dispatches, then try to write. It must go through.
wep.WaitDispatch()
- wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
+ wep.WritePacket(nil, nil /* gso */, 0, &stack.PacketBuffer{})
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on writes, then try to write. It must not go through.
wep.WaitWrite()
- wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
+ wep.WritePacket(nil, nil /* gso */, 0, &stack.PacketBuffer{})
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
@@ -120,21 +120,21 @@ func TestWaitDispatch(t *testing.T) {
}
// Dispatch and check that it goes through.
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
+ ep.dispatcher.DeliverNetworkPacket("", "", 0, &stack.PacketBuffer{})
if want := 1; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}
// Wait on writes, then try to dispatch. It must go through.
wep.WaitWrite()
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
+ ep.dispatcher.DeliverNetworkPacket("", "", 0, &stack.PacketBuffer{})
if want := 2; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}
// Wait on dispatches, then try to dispatch. It must not go through.
wep.WaitDispatch()
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
+ ep.dispatcher.DeliverNetworkPacket("", "", 0, &stack.PacketBuffer{})
if want := 2; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 9d0797af7..ea1acba83 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -80,7 +80,7 @@ func (e *endpoint) MaxHeaderLength() uint16 {
func (e *endpoint) Close() {}
-func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) *tcpip.Error {
return tcpip.ErrNotSupported
}
@@ -94,11 +94,11 @@ func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList
return 0, tcpip.ErrNotSupported
}
-func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
return tcpip.ErrNotSupported
}
-func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
v, ok := pkt.Data.PullUp(header.ARPSize)
if !ok {
return
@@ -122,7 +122,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget())
copy(packet.HardwareAddressTarget(), h.HardwareAddressSender())
copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender())
- e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, stack.PacketBuffer{
+ e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
})
fallthrough // also fill the cache from requests
@@ -177,7 +177,7 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.
copy(h.ProtocolAddressSender(), localAddr)
copy(h.ProtocolAddressTarget(), addr)
- return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, stack.PacketBuffer{
+ return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
})
}
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 1646d9cde..66e67429c 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -103,7 +103,7 @@ func TestDirectRequest(t *testing.T) {
inject := func(addr tcpip.Address) {
copy(h.ProtocolAddressTarget(), addr)
- c.linkEP.InjectInbound(arp.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(arp.ProtocolNumber, &stack.PacketBuffer{
Data: v.ToVectorisedView(),
})
}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 4c20301c6..d9b62f2db 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -96,7 +96,7 @@ func (t *testObject) checkValues(protocol tcpip.TransportProtocolNumber, vv buff
// DeliverTransportPacket is called by network endpoints after parsing incoming
// packets. This is used by the test object to verify that the results of the
// parsing are expected.
-func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt stack.PacketBuffer) {
+func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt *stack.PacketBuffer) {
t.checkValues(protocol, pkt.Data, r.RemoteAddress, r.LocalAddress)
t.dataCalls++
}
@@ -104,7 +104,7 @@ func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.Trans
// DeliverTransportControlPacket is called by network endpoints after parsing
// incoming control (ICMP) packets. This is used by the test object to verify
// that the results of the parsing are expected.
-func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
t.checkValues(trans, pkt.Data, remote, local)
if typ != t.typ {
t.t.Errorf("typ = %v, want %v", typ, t.typ)
@@ -150,7 +150,7 @@ func (*testObject) Wait() {}
// WritePacket is called by network endpoints after producing a packet and
// writing it to the link endpoint. This is used by the test object to verify
// that the produced packet is as expected.
-func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
var prot tcpip.TransportProtocolNumber
var srcAddr tcpip.Address
var dstAddr tcpip.Address
@@ -246,7 +246,11 @@ func TestIPv4Send(t *testing.T) {
if err != nil {
t.Fatalf("could not find route: %v", err)
}
- if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{Protocol: 123, TTL: 123, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: 123,
+ TTL: 123,
+ TOS: stack.DefaultTOS,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: payload.ToVectorisedView(),
}); err != nil {
@@ -289,7 +293,7 @@ func TestIPv4Receive(t *testing.T) {
if err != nil {
t.Fatalf("could not find route: %v", err)
}
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: view.ToVectorisedView(),
})
if o.dataCalls != 1 {
@@ -379,7 +383,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
o.extra = c.expectedExtra
vv := view[:len(view)-c.trunc].ToVectorisedView()
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: vv,
})
if want := c.expectedCount; o.controlCalls != want {
@@ -444,7 +448,7 @@ func TestIPv4FragmentationReceive(t *testing.T) {
}
// Send first segment.
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: frag1.ToVectorisedView(),
})
if o.dataCalls != 0 {
@@ -452,7 +456,7 @@ func TestIPv4FragmentationReceive(t *testing.T) {
}
// Send second segment.
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: frag2.ToVectorisedView(),
})
if o.dataCalls != 1 {
@@ -487,7 +491,11 @@ func TestIPv6Send(t *testing.T) {
if err != nil {
t.Fatalf("could not find route: %v", err)
}
- if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{Protocol: 123, TTL: 123, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: 123,
+ TTL: 123,
+ TOS: stack.DefaultTOS,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: payload.ToVectorisedView(),
}); err != nil {
@@ -530,7 +538,7 @@ func TestIPv6Receive(t *testing.T) {
t.Fatalf("could not find route: %v", err)
}
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: view.ToVectorisedView(),
})
if o.dataCalls != 1 {
@@ -644,7 +652,7 @@ func TestIPv6ReceiveControl(t *testing.T) {
// Set ICMPv6 checksum.
icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIpv6Addr, buffer.VectorisedView{}))
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: view[:len(view)-c.trunc].ToVectorisedView(),
})
if want := c.expectedCount; o.controlCalls != want {
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 4cbefe5ab..d1c3ae835 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -24,7 +24,7 @@ import (
// the original packet that caused the ICMP one to be sent. This information is
// used to find out which transport endpoint must be notified about the ICMP
// packet.
-func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
if !ok {
return
@@ -56,7 +56,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
}
-func (e *endpoint) handleICMP(r *stack.Route, pkt stack.PacketBuffer) {
+func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
stats := r.Stats()
received := stats.ICMP.V4PacketsReceived
v, ok := pkt.Data.PullUp(header.ICMPv4MinimumSize)
@@ -88,7 +88,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt stack.PacketBuffer) {
// It's possible that a raw socket expects to receive this.
h.SetChecksum(wantChecksum)
- e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, stack.PacketBuffer{
+ e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, &stack.PacketBuffer{
Data: pkt.Data.Clone(nil),
NetworkHeader: append(buffer.View(nil), pkt.NetworkHeader...),
})
@@ -102,7 +102,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt stack.PacketBuffer) {
pkt.SetChecksum(0)
pkt.SetChecksum(^header.Checksum(pkt, header.ChecksumVV(vv, 0)))
sent := stats.ICMP.V4PacketsSent
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: header.ICMPv4ProtocolNumber,
+ TTL: r.DefaultTTL(),
+ TOS: stack.DefaultTOS,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: vv,
TransportHeader: buffer.View(pkt),
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 64046cbbf..9cd7592f4 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -129,7 +129,7 @@ func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
// packet's stated length matches the length of the header+payload. mtu
// includes the IP header and options. This does not support the DontFragment
// IP flag.
-func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int, pkt *stack.PacketBuffer) *tcpip.Error {
// This packet is too big, it needs to be fragmented.
ip := header.IPv4(pkt.Header.View())
flags := ip.Flags()
@@ -169,7 +169,7 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int,
if i > 0 {
newPayload := pkt.Data.Clone(nil)
newPayload.CapLength(innerMTU)
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, stack.PacketBuffer{
+ if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
Header: pkt.Header,
Data: newPayload,
NetworkHeader: buffer.View(h),
@@ -188,7 +188,7 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int,
newPayload := pkt.Data.Clone(nil)
newPayloadLength := outerMTU - pkt.Header.UsedLength()
newPayload.CapLength(newPayloadLength)
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, stack.PacketBuffer{
+ if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
Header: pkt.Header,
Data: newPayload,
NetworkHeader: buffer.View(h),
@@ -202,7 +202,7 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int,
startOfHdr := pkt.Header
startOfHdr.TrimBack(pkt.Header.UsedLength() - outerMTU)
emptyVV := buffer.NewVectorisedView(0, []buffer.View{})
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, stack.PacketBuffer{
+ if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
Header: startOfHdr,
Data: emptyVV,
NetworkHeader: buffer.View(h),
@@ -245,7 +245,7 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS
}
// WritePacket writes a packet to the given destination address and protocol.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
pkt.NetworkHeader = buffer.View(ip)
@@ -253,7 +253,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// iptables filtering. All packets that reach here are locally
// generated.
ipt := e.stack.IPTables()
- if ok := ipt.Check(stack.Output, &pkt, gso, r, "", nicName); !ok {
+ if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
// iptables is telling us to drop the packet.
return nil
}
@@ -271,9 +271,9 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
views[0] = pkt.Header.View()
views = append(views, pkt.Data.Views()...)
- packet := stack.PacketBuffer{
- Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views)}
- ep.HandlePacket(&route, packet)
+ ep.HandlePacket(&route, &stack.PacketBuffer{
+ Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
+ })
return nil
}
}
@@ -286,7 +286,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
views = append(views, pkt.Data.Views()...)
loopedR := r.MakeLoopedRoute()
- e.HandlePacket(&loopedR, stack.PacketBuffer{
+ e.HandlePacket(&loopedR, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
})
@@ -351,14 +351,14 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
views[0] = pkt.Header.View()
views = append(views, pkt.Data.Views()...)
- packet := stack.PacketBuffer{
- Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views)}
- ep.HandlePacket(&route, packet)
+ ep.HandlePacket(&route, &stack.PacketBuffer{
+ Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
+ })
n++
continue
}
}
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, *pkt); err != nil {
+ if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
return n, err
}
@@ -370,7 +370,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
// WriteHeaderIncludedPacket writes a packet already containing a network
// header through the given route.
-func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
// The packet already has an IP header, but there are a few required
// checks.
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
@@ -426,7 +426,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuf
// HandlePacket is called by the link layer when new ipv4 packets arrive for
// this endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
headerView, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
if !ok {
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -447,7 +447,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
// iptables filtering. All packets that reach here are intended for
// this machine and will not be forwarded.
ipt := e.stack.IPTables()
- if ok := ipt.Check(stack.Input, &pkt, nil, nil, "", ""); !ok {
+ if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
// iptables is telling us to drop the packet.
return
}
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 36035c820..c208ebd99 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -114,7 +114,7 @@ func makeHdrAndPayload(hdrLength int, extraLength int, viewSizes []int) (buffer.
// comparePayloads compared the contents of all the packets against the contents
// of the source packet.
-func compareFragments(t *testing.T, packets []stack.PacketBuffer, sourcePacketInfo stack.PacketBuffer, mtu uint32) {
+func compareFragments(t *testing.T, packets []*stack.PacketBuffer, sourcePacketInfo *stack.PacketBuffer, mtu uint32) {
t.Helper()
// Make a complete array of the sourcePacketInfo packet.
source := header.IPv4(packets[0].Header.View()[:header.IPv4MinimumSize])
@@ -174,7 +174,7 @@ func compareFragments(t *testing.T, packets []stack.PacketBuffer, sourcePacketIn
type errorChannel struct {
*channel.Endpoint
- Ch chan stack.PacketBuffer
+ Ch chan *stack.PacketBuffer
packetCollectorErrors []*tcpip.Error
}
@@ -184,7 +184,7 @@ type errorChannel struct {
func newErrorChannel(size int, mtu uint32, linkAddr tcpip.LinkAddress, packetCollectorErrors []*tcpip.Error) *errorChannel {
return &errorChannel{
Endpoint: channel.New(size, mtu, linkAddr),
- Ch: make(chan stack.PacketBuffer, size),
+ Ch: make(chan *stack.PacketBuffer, size),
packetCollectorErrors: packetCollectorErrors,
}
}
@@ -203,7 +203,7 @@ func (e *errorChannel) Drain() int {
}
// WritePacket stores outbound packets into the channel.
-func (e *errorChannel) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *errorChannel) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
select {
case e.Ch <- pkt:
default:
@@ -282,13 +282,17 @@ func TestFragmentation(t *testing.T) {
for _, ft := range fragTests {
t.Run(ft.description, func(t *testing.T) {
hdr, payload := makeHdrAndPayload(ft.hdrLength, ft.extraLength, ft.payloadViewsSizes)
- source := stack.PacketBuffer{
+ source := &stack.PacketBuffer{
Header: hdr,
// Save the source payload because WritePacket will modify it.
Data: payload.Clone(nil),
}
c := buildContext(t, nil, ft.mtu)
- err := c.Route.WritePacket(ft.gso, stack.NetworkHeaderParams{Protocol: tcp.ProtocolNumber, TTL: 42, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ err := c.Route.WritePacket(ft.gso, stack.NetworkHeaderParams{
+ Protocol: tcp.ProtocolNumber,
+ TTL: 42,
+ TOS: stack.DefaultTOS,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: payload,
})
@@ -296,7 +300,7 @@ func TestFragmentation(t *testing.T) {
t.Errorf("err got %v, want %v", err, nil)
}
- var results []stack.PacketBuffer
+ var results []*stack.PacketBuffer
L:
for {
select {
@@ -338,7 +342,11 @@ func TestFragmentationErrors(t *testing.T) {
t.Run(ft.description, func(t *testing.T) {
hdr, payload := makeHdrAndPayload(ft.hdrLength, header.IPv4MinimumSize, ft.payloadViewsSizes)
c := buildContext(t, ft.packetCollectorErrors, ft.mtu)
- err := c.Route.WritePacket(&stack.GSO{}, stack.NetworkHeaderParams{Protocol: tcp.ProtocolNumber, TTL: 42, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ err := c.Route.WritePacket(&stack.GSO{}, stack.NetworkHeaderParams{
+ Protocol: tcp.ProtocolNumber,
+ TTL: 42,
+ TOS: stack.DefaultTOS,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: payload,
})
@@ -460,7 +468,7 @@ func TestInvalidFragments(t *testing.T) {
s.CreateNIC(nicID, sniffer.New(ep))
for _, pkt := range tc.packets {
- ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, stack.PacketBuffer{
+ ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(len(pkt), []buffer.View{pkt}),
})
}
@@ -698,7 +706,7 @@ func TestReceiveFragments(t *testing.T) {
vv := hdr.View().ToVectorisedView()
vv.AppendView(frag.payload)
- e.InjectInbound(header.IPv4ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(header.IPv4ProtocolNumber, &stack.PacketBuffer{
Data: vv,
})
}
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index bdf3a0d25..b62fb1de6 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -27,7 +27,7 @@ import (
// the original packet that caused the ICMP one to be sent. This information is
// used to find out which transport endpoint must be notified about the ICMP
// packet.
-func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
if !ok {
return
@@ -70,7 +70,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
}
-func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.PacketBuffer, hasFragmentHeader bool) {
+func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt *stack.PacketBuffer, hasFragmentHeader bool) {
stats := r.Stats().ICMP
sent := stats.V6PacketsSent
received := stats.V6PacketsReceived
@@ -288,7 +288,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
//
// The IP Hop Limit field has a value of 255, i.e., the packet
// could not possibly have been forwarded by a router.
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
}); err != nil {
sent.Dropped.Increment()
@@ -390,7 +390,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
copy(packet, icmpHdr)
packet.SetType(header.ICMPv6EchoReply)
packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data))
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: pkt.Data,
}); err != nil {
@@ -532,7 +532,7 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.
})
// TODO(stijlist): count this in ICMP stats.
- return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, stack.PacketBuffer{
+ return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
Header: hdr,
})
}
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index d412ff688..a720f626f 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -57,7 +57,7 @@ func (*stubLinkEndpoint) LinkAddress() tcpip.LinkAddress {
return ""
}
-func (*stubLinkEndpoint) WritePacket(*stack.Route, *stack.GSO, tcpip.NetworkProtocolNumber, stack.PacketBuffer) *tcpip.Error {
+func (*stubLinkEndpoint) WritePacket(*stack.Route, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error {
return nil
}
@@ -67,7 +67,7 @@ type stubDispatcher struct {
stack.TransportDispatcher
}
-func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, stack.PacketBuffer) {
+func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, *stack.PacketBuffer) {
}
type stubLinkAddressCache struct {
@@ -189,7 +189,7 @@ func TestICMPCounts(t *testing.T) {
SrcAddr: r.LocalAddress,
DstAddr: r.RemoteAddress,
})
- ep.HandlePacket(&r, stack.PacketBuffer{
+ ep.HandlePacket(&r, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
}
@@ -328,7 +328,7 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header.
views := []buffer.View{pi.Pkt.Header.View(), pi.Pkt.Data.ToView()}
size := pi.Pkt.Header.UsedLength() + pi.Pkt.Data.Size()
vv := buffer.NewVectorisedView(size, views)
- args.dst.InjectLinkAddr(pi.Proto, args.dst.LinkAddress(), stack.PacketBuffer{
+ args.dst.InjectLinkAddr(pi.Proto, args.dst.LinkAddress(), &stack.PacketBuffer{
Data: vv,
})
}
@@ -563,7 +563,7 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
SrcAddr: lladdr1,
DstAddr: lladdr0,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
}
@@ -740,7 +740,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
SrcAddr: lladdr1,
DstAddr: lladdr0,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
}
@@ -918,7 +918,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
SrcAddr: lladdr1,
DstAddr: lladdr0,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(header.IPv6MinimumSize+size+payloadSize, []buffer.View{hdr.View(), payload}),
})
}
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index daf1fcbc6..0d94ad122 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -116,7 +116,7 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS
}
// WritePacket writes a packet to the given destination address and protocol.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt stack.PacketBuffer) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
pkt.NetworkHeader = buffer.View(ip)
@@ -128,7 +128,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
views = append(views, pkt.Data.Views()...)
loopedR := r.MakeLoopedRoute()
- e.HandlePacket(&loopedR, stack.PacketBuffer{
+ e.HandlePacket(&loopedR, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
})
@@ -163,14 +163,14 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
// WriteHeaderIncludedPacker implements stack.NetworkEndpoint. It is not yet
// supported by IPv6.
-func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
+func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
// TODO(b/146666412): Support IPv6 header-included packets.
return tcpip.ErrNotSupported
}
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
headerView, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
if !ok {
r.Stats().IP.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 841a0cb7a..213ff64f2 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -65,7 +65,7 @@ func testReceiveICMP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
DstAddr: dst,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -123,7 +123,7 @@ func testReceiveUDP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
DstAddr: dst,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -637,7 +637,7 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
DstAddr: addr2,
})
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -1238,7 +1238,7 @@ func TestReceiveIPv6Fragments(t *testing.T) {
vv := hdr.View().ToVectorisedView()
vv.Append(f.data)
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: vv,
})
}
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index 12b70f7e9..3c141b91b 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -136,7 +136,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) {
t.Fatalf("got invalid = %d, want = 0", got)
}
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -380,7 +380,7 @@ func TestNeighorSolicitationResponse(t *testing.T) {
t.Fatalf("got invalid = %d, want = 0", got)
}
- e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, stack.PacketBuffer{
+ e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -497,7 +497,7 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) {
t.Fatalf("got invalid = %d, want = 0", got)
}
- e.InjectInbound(ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -568,7 +568,7 @@ func TestNDPValidation(t *testing.T) {
SrcAddr: r.LocalAddress,
DstAddr: r.RemoteAddress,
})
- ep.HandlePacket(r, stack.PacketBuffer{
+ ep.HandlePacket(r, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
}
@@ -884,7 +884,7 @@ func TestRouterAdvertValidation(t *testing.T) {
t.Fatalf("got rxRA = %d, want = 0", got)
}
- e.InjectInbound(header.IPv6ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(header.IPv6ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 7d1ede1f2..d4053be08 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -186,7 +186,7 @@ func parseHeaders(pkt *PacketBuffer) {
}
// packetToTuple converts packet to a tuple in original direction.
-func packetToTuple(pkt PacketBuffer, hook Hook) (connTrackTuple, *tcpip.Error) {
+func packetToTuple(pkt *PacketBuffer, hook Hook) (connTrackTuple, *tcpip.Error) {
var tuple connTrackTuple
netHeader := header.IPv4(pkt.NetworkHeader)
@@ -265,7 +265,7 @@ func (ct *ConnTrackTable) connTrackForPacket(pkt *PacketBuffer, hook Hook, creat
}
var dir ctDirection
- tuple, err := packetToTuple(*pkt, hook)
+ tuple, err := packetToTuple(pkt, hook)
if err != nil {
return nil, dir
}
diff --git a/pkg/tcpip/stack/forwarder.go b/pkg/tcpip/stack/forwarder.go
index 6b64cd37f..3eff141e6 100644
--- a/pkg/tcpip/stack/forwarder.go
+++ b/pkg/tcpip/stack/forwarder.go
@@ -32,7 +32,7 @@ type pendingPacket struct {
nic *NIC
route *Route
proto tcpip.NetworkProtocolNumber
- pkt PacketBuffer
+ pkt *PacketBuffer
}
type forwardQueue struct {
@@ -50,7 +50,7 @@ func newForwardQueue() *forwardQueue {
return &forwardQueue{packets: make(map[<-chan struct{}][]*pendingPacket)}
}
-func (f *forwardQueue) enqueue(ch <-chan struct{}, n *NIC, r *Route, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
+func (f *forwardQueue) enqueue(ch <-chan struct{}, n *NIC, r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
shouldWait := false
f.Lock()
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index 8084d50bc..63537aaad 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -68,7 +68,7 @@ func (f *fwdTestNetworkEndpoint) ID() *NetworkEndpointID {
return &f.id
}
-func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt PacketBuffer) {
+func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt *PacketBuffer) {
// Consume the network header.
b, ok := pkt.Data.PullUp(fwdTestNetHeaderLen)
if !ok {
@@ -96,7 +96,7 @@ func (f *fwdTestNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNu
return f.proto.Number()
}
-func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt PacketBuffer) *tcpip.Error {
+func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error {
// Add the protocol's header to the packet and send it to the link
// endpoint.
b := pkt.Header.Prepend(fwdTestNetHeaderLen)
@@ -112,7 +112,7 @@ func (f *fwdTestNetworkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBuf
panic("not implemented")
}
-func (*fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt PacketBuffer) *tcpip.Error {
+func (*fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error {
return tcpip.ErrNotSupported
}
@@ -190,7 +190,7 @@ func (f *fwdTestNetworkProtocol) LinkAddressProtocol() tcpip.NetworkProtocolNumb
type fwdTestPacketInfo struct {
RemoteLinkAddress tcpip.LinkAddress
LocalLinkAddress tcpip.LinkAddress
- Pkt PacketBuffer
+ Pkt *PacketBuffer
}
type fwdTestLinkEndpoint struct {
@@ -203,13 +203,13 @@ type fwdTestLinkEndpoint struct {
}
// InjectInbound injects an inbound packet.
-func (e *fwdTestLinkEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
+func (e *fwdTestLinkEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
e.InjectLinkAddr(protocol, "", pkt)
}
// InjectLinkAddr injects an inbound packet with a remote link address.
-func (e *fwdTestLinkEndpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt PacketBuffer) {
- e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, pkt)
+func (e *fwdTestLinkEndpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt *PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket(remote, "" /* local */, protocol, pkt)
}
// Attach saves the stack network-layer dispatcher for use later when packets
@@ -251,7 +251,7 @@ func (e *fwdTestLinkEndpoint) LinkAddress() tcpip.LinkAddress {
return e.linkAddr
}
-func (e fwdTestLinkEndpoint) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) *tcpip.Error {
+func (e fwdTestLinkEndpoint) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error {
p := fwdTestPacketInfo{
RemoteLinkAddress: r.RemoteLinkAddress,
LocalLinkAddress: r.LocalLinkAddress,
@@ -270,7 +270,7 @@ func (e fwdTestLinkEndpoint) WritePacket(r *Route, gso *GSO, protocol tcpip.Netw
func (e *fwdTestLinkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
n := 0
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
- e.WritePacket(r, gso, protocol, *pkt)
+ e.WritePacket(r, gso, protocol, pkt)
n++
}
@@ -280,7 +280,7 @@ func (e *fwdTestLinkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBuffer
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *fwdTestLinkEndpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
p := fwdTestPacketInfo{
- Pkt: PacketBuffer{Data: vv},
+ Pkt: &PacketBuffer{Data: vv},
}
select {
@@ -362,7 +362,7 @@ func TestForwardingWithStaticResolver(t *testing.T) {
// forwarded to NIC 2.
buf := buffer.NewView(30)
buf[0] = 3
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
@@ -399,7 +399,7 @@ func TestForwardingWithFakeResolver(t *testing.T) {
// forwarded to NIC 2.
buf := buffer.NewView(30)
buf[0] = 3
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
@@ -430,7 +430,7 @@ func TestForwardingWithNoResolver(t *testing.T) {
// forwarded to NIC 2.
buf := buffer.NewView(30)
buf[0] = 3
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
@@ -460,7 +460,7 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
// not be forwarded.
buf := buffer.NewView(30)
buf[0] = 4
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
@@ -468,7 +468,7 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
// forwarded to NIC 2.
buf = buffer.NewView(30)
buf[0] = 3
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
@@ -510,7 +510,7 @@ func TestForwardingWithFakeResolverTwoPackets(t *testing.T) {
for i := 0; i < 2; i++ {
buf := buffer.NewView(30)
buf[0] = 3
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
}
@@ -557,7 +557,7 @@ func TestForwardingWithFakeResolverManyPackets(t *testing.T) {
buf[0] = 3
// Set the packet sequence number.
binary.BigEndian.PutUint16(buf[fwdTestNetHeaderLen:], uint16(i))
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
}
@@ -610,7 +610,7 @@ func TestForwardingWithFakeResolverManyResolutions(t *testing.T) {
// maxPendingResolutions + 7).
buf := buffer.NewView(30)
buf[0] = byte(3 + i)
- ep1.InjectInbound(fwdTestNetNumber, PacketBuffer{
+ ep1.InjectInbound(fwdTestNetNumber, &PacketBuffer{
Data: buf.ToVectorisedView(),
})
}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 443423b3c..4e9b404c8 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -16,7 +16,6 @@ package stack
import (
"fmt"
- "strings"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -44,11 +43,11 @@ const HookUnset = -1
// DefaultTables returns a default set of tables. Each chain is set to accept
// all packets.
-func DefaultTables() IPTables {
+func DefaultTables() *IPTables {
// TODO(gvisor.dev/issue/170): We may be able to swap out some strings for
// iotas.
- return IPTables{
- Tables: map[string]Table{
+ return &IPTables{
+ tables: map[string]Table{
TablenameNat: Table{
Rules: []Rule{
Rule{Target: AcceptTarget{}},
@@ -107,7 +106,7 @@ func DefaultTables() IPTables {
UserChains: map[string]int{},
},
},
- Priorities: map[Hook][]string{
+ priorities: map[Hook][]string{
Input: []string{TablenameNat, TablenameFilter},
Prerouting: []string{TablenameMangle, TablenameNat},
Output: []string{TablenameMangle, TablenameNat, TablenameFilter},
@@ -159,6 +158,36 @@ func EmptyNatTable() Table {
}
}
+// GetTable returns table by name.
+func (it *IPTables) GetTable(name string) (Table, bool) {
+ it.mu.RLock()
+ defer it.mu.RUnlock()
+ t, ok := it.tables[name]
+ return t, ok
+}
+
+// ReplaceTable replaces or inserts table by name.
+func (it *IPTables) ReplaceTable(name string, table Table) {
+ it.mu.Lock()
+ defer it.mu.Unlock()
+ it.tables[name] = table
+}
+
+// ModifyTables acquires write-lock and calls fn with internal name-to-table
+// map. This function can be used to update multiple tables atomically.
+func (it *IPTables) ModifyTables(fn func(map[string]Table)) {
+ it.mu.Lock()
+ defer it.mu.Unlock()
+ fn(it.tables)
+}
+
+// GetPriorities returns slice of priorities associated with hook.
+func (it *IPTables) GetPriorities(hook Hook) []string {
+ it.mu.RLock()
+ defer it.mu.RUnlock()
+ return it.priorities[hook]
+}
+
// A chainVerdict is what a table decides should be done with a packet.
type chainVerdict int
@@ -185,8 +214,8 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
it.connections.HandlePacket(pkt, hook, gso, r)
// Go through each table containing the hook.
- for _, tablename := range it.Priorities[hook] {
- table := it.Tables[tablename]
+ for _, tablename := range it.GetPriorities(hook) {
+ table, _ := it.GetTable(tablename)
ruleIdx := table.BuiltinChains[hook]
switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict {
// If the table returns Accept, move on to the next table.
@@ -314,7 +343,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
}
// Check whether the packet matches the IP header filter.
- if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader), hook, nicName) {
+ if !rule.Filter.match(header.IPv4(pkt.NetworkHeader), hook, nicName) {
// Continue on to the next rule.
return RuleJump, ruleIdx + 1
}
@@ -322,7 +351,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
// Go through each rule matcher. If they all match, run
// the rule target.
for _, matcher := range rule.Matchers {
- matches, hotdrop := matcher.Match(hook, *pkt, "")
+ matches, hotdrop := matcher.Match(hook, pkt, "")
if hotdrop {
return RuleDrop, 0
}
@@ -335,47 +364,3 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
// All the matchers matched, so run the target.
return rule.Target.Action(pkt, &it.connections, hook, gso, r, address)
}
-
-func filterMatch(filter IPHeaderFilter, hdr header.IPv4, hook Hook, nicName string) bool {
- // TODO(gvisor.dev/issue/170): Support other fields of the filter.
- // Check the transport protocol.
- if filter.Protocol != 0 && filter.Protocol != hdr.TransportProtocol() {
- return false
- }
-
- // Check the destination IP.
- dest := hdr.DestinationAddress()
- matches := true
- for i := range filter.Dst {
- if dest[i]&filter.DstMask[i] != filter.Dst[i] {
- matches = false
- break
- }
- }
- if matches == filter.DstInvert {
- return false
- }
-
- // Check the output interface.
- // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING
- // hooks after supported.
- if hook == Output {
- n := len(filter.OutputInterface)
- if n == 0 {
- return true
- }
-
- // If the interface name ends with '+', any interface which begins
- // with the name should be matched.
- ifName := filter.OutputInterface
- matches = true
- if strings.HasSuffix(ifName, "+") {
- matches = strings.HasPrefix(nicName, ifName[:n-1])
- } else {
- matches = nicName == ifName
- }
- return filter.OutputInterfaceInvert != matches
- }
-
- return true
-}
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index fe06007ae..4a6a5c6f1 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -15,7 +15,11 @@
package stack
import (
+ "strings"
+ "sync"
+
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
// A Hook specifies one of the hooks built into the network stack.
@@ -75,13 +79,17 @@ const (
// IPTables holds all the tables for a netstack.
type IPTables struct {
- // Tables maps table names to tables. User tables have arbitrary names.
- Tables map[string]Table
+ // mu protects tables and priorities.
+ mu sync.RWMutex
+
+ // tables maps table names to tables. User tables have arbitrary names. mu
+ // needs to be locked for accessing.
+ tables map[string]Table
- // Priorities maps each hook to a list of table names. The order of the
+ // priorities maps each hook to a list of table names. The order of the
// list is the order in which each table should be visited for that
- // hook.
- Priorities map[Hook][]string
+ // hook. mu needs to be locked for accessing.
+ priorities map[Hook][]string
connections ConnTrackTable
}
@@ -159,6 +167,16 @@ type IPHeaderFilter struct {
// comparison.
DstInvert bool
+ // Src matches the source IP address.
+ Src tcpip.Address
+
+ // SrcMask masks bits of the source IP address when comparing with Src.
+ SrcMask tcpip.Address
+
+ // SrcInvert inverts the meaning of the source IP check, i.e. when true the
+ // filter will match packets that fail the source comparison.
+ SrcInvert bool
+
// OutputInterface matches the name of the outgoing interface for the
// packet.
OutputInterface string
@@ -173,6 +191,55 @@ type IPHeaderFilter struct {
OutputInterfaceInvert bool
}
+// match returns whether hdr matches the filter.
+func (fl IPHeaderFilter) match(hdr header.IPv4, hook Hook, nicName string) bool {
+ // TODO(gvisor.dev/issue/170): Support other fields of the filter.
+ // Check the transport protocol.
+ if fl.Protocol != 0 && fl.Protocol != hdr.TransportProtocol() {
+ return false
+ }
+
+ // Check the source and destination IPs.
+ if !filterAddress(hdr.DestinationAddress(), fl.DstMask, fl.Dst, fl.DstInvert) || !filterAddress(hdr.SourceAddress(), fl.SrcMask, fl.Src, fl.SrcInvert) {
+ return false
+ }
+
+ // Check the output interface.
+ // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING
+ // hooks after supported.
+ if hook == Output {
+ n := len(fl.OutputInterface)
+ if n == 0 {
+ return true
+ }
+
+ // If the interface name ends with '+', any interface which begins
+ // with the name should be matched.
+ ifName := fl.OutputInterface
+ matches := true
+ if strings.HasSuffix(ifName, "+") {
+ matches = strings.HasPrefix(nicName, ifName[:n-1])
+ } else {
+ matches = nicName == ifName
+ }
+ return fl.OutputInterfaceInvert != matches
+ }
+
+ return true
+}
+
+// filterAddress returns whether addr matches the filter.
+func filterAddress(addr, mask, filterAddr tcpip.Address, invert bool) bool {
+ matches := true
+ for i := range filterAddr {
+ if addr[i]&mask[i] != filterAddr[i] {
+ matches = false
+ break
+ }
+ }
+ return matches != invert
+}
+
// A Matcher is the interface for matching packets.
type Matcher interface {
// Name returns the name of the Matcher.
@@ -183,7 +250,7 @@ type Matcher interface {
// used for suspicious packets.
//
// Precondition: packet.NetworkHeader is set.
- Match(hook Hook, packet PacketBuffer, interfaceName string) (matches bool, hotdrop bool)
+ Match(hook Hook, packet *PacketBuffer, interfaceName string) (matches bool, hotdrop bool)
}
// A Target is the interface for taking an action for a packet.
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 526c7d6ff..ae7a8f740 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -750,7 +750,7 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address) *tcpip.Error {
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
TOS: DefaultTOS,
- }, PacketBuffer{Header: hdr},
+ }, &PacketBuffer{Header: hdr},
); err != nil {
sent.Dropped.Increment()
return err
@@ -1881,7 +1881,7 @@ func (ndp *ndpState) startSolicitingRouters() {
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
TOS: DefaultTOS,
- }, PacketBuffer{Header: hdr},
+ }, &PacketBuffer{Header: hdr},
); err != nil {
sent.Dropped.Increment()
log.Printf("startSolicitingRouters: error writing NDP router solicit message on NIC(%d); err = %s", ndp.nic.ID(), err)
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index b3d174cdd..58f1ebf60 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -613,7 +613,7 @@ func TestDADFail(t *testing.T) {
// Receive a packet to simulate multiple nodes owning or
// attempting to own the same address.
hdr := test.makeBuf(addr1)
- e.InjectInbound(header.IPv6ProtocolNumber, stack.PacketBuffer{
+ e.InjectInbound(header.IPv6ProtocolNumber, &stack.PacketBuffer{
Data: hdr.View().ToVectorisedView(),
})
@@ -935,7 +935,7 @@ func TestSetNDPConfigurations(t *testing.T) {
// raBufWithOptsAndDHCPv6 returns a valid NDP Router Advertisement with options
// and DHCPv6 configurations specified.
-func raBufWithOptsAndDHCPv6(ip tcpip.Address, rl uint16, managedAddress, otherConfigurations bool, optSer header.NDPOptionsSerializer) stack.PacketBuffer {
+func raBufWithOptsAndDHCPv6(ip tcpip.Address, rl uint16, managedAddress, otherConfigurations bool, optSer header.NDPOptionsSerializer) *stack.PacketBuffer {
icmpSize := header.ICMPv6HeaderSize + header.NDPRAMinimumSize + int(optSer.Length())
hdr := buffer.NewPrependable(header.IPv6MinimumSize + icmpSize)
pkt := header.ICMPv6(hdr.Prepend(icmpSize))
@@ -970,14 +970,14 @@ func raBufWithOptsAndDHCPv6(ip tcpip.Address, rl uint16, managedAddress, otherCo
DstAddr: header.IPv6AllNodesMulticastAddress,
})
- return stack.PacketBuffer{Data: hdr.View().ToVectorisedView()}
+ return &stack.PacketBuffer{Data: hdr.View().ToVectorisedView()}
}
// raBufWithOpts returns a valid NDP Router Advertisement with options.
//
// Note, raBufWithOpts does not populate any of the RA fields other than the
// Router Lifetime.
-func raBufWithOpts(ip tcpip.Address, rl uint16, optSer header.NDPOptionsSerializer) stack.PacketBuffer {
+func raBufWithOpts(ip tcpip.Address, rl uint16, optSer header.NDPOptionsSerializer) *stack.PacketBuffer {
return raBufWithOptsAndDHCPv6(ip, rl, false, false, optSer)
}
@@ -986,7 +986,7 @@ func raBufWithOpts(ip tcpip.Address, rl uint16, optSer header.NDPOptionsSerializ
//
// Note, raBufWithDHCPv6 does not populate any of the RA fields other than the
// DHCPv6 related ones.
-func raBufWithDHCPv6(ip tcpip.Address, managedAddresses, otherConfiguratiosns bool) stack.PacketBuffer {
+func raBufWithDHCPv6(ip tcpip.Address, managedAddresses, otherConfiguratiosns bool) *stack.PacketBuffer {
return raBufWithOptsAndDHCPv6(ip, 0, managedAddresses, otherConfiguratiosns, header.NDPOptionsSerializer{})
}
@@ -994,7 +994,7 @@ func raBufWithDHCPv6(ip tcpip.Address, managedAddresses, otherConfiguratiosns bo
//
// Note, raBuf does not populate any of the RA fields other than the
// Router Lifetime.
-func raBuf(ip tcpip.Address, rl uint16) stack.PacketBuffer {
+func raBuf(ip tcpip.Address, rl uint16) *stack.PacketBuffer {
return raBufWithOpts(ip, rl, header.NDPOptionsSerializer{})
}
@@ -1003,7 +1003,7 @@ func raBuf(ip tcpip.Address, rl uint16) stack.PacketBuffer {
//
// Note, raBufWithPI does not populate any of the RA fields other than the
// Router Lifetime.
-func raBufWithPI(ip tcpip.Address, rl uint16, prefix tcpip.AddressWithPrefix, onLink, auto bool, vl, pl uint32) stack.PacketBuffer {
+func raBufWithPI(ip tcpip.Address, rl uint16, prefix tcpip.AddressWithPrefix, onLink, auto bool, vl, pl uint32) *stack.PacketBuffer {
flags := uint8(0)
if onLink {
// The OnLink flag is the 7th bit in the flags byte.
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 54103fdb3..ec8e3cb85 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1153,7 +1153,7 @@ func (n *NIC) isInGroup(addr tcpip.Address) bool {
return joins != 0
}
-func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt PacketBuffer) {
+func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt *PacketBuffer) {
r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
r.RemoteLinkAddress = remotelinkAddr
@@ -1167,7 +1167,7 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address,
// Note that the ownership of the slice backing vv is retained by the caller.
// This rule applies only to the slice itself, not to the items of the slice;
// the ownership of the items is not retained by the caller.
-func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
+func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
n.mu.RLock()
enabled := n.mu.enabled
// If the NIC is not yet enabled, don't receive any packets.
@@ -1233,14 +1233,14 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
// iptables filtering.
ipt := n.stack.IPTables()
address := n.primaryAddress(protocol)
- if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address, ""); !ok {
+ if ok := ipt.Check(Prerouting, pkt, nil, nil, address.Address, ""); !ok {
// iptables is telling us to drop the packet.
return
}
}
if ref := n.getRef(protocol, dst); ref != nil {
- handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt)
+ handlePacket(protocol, dst, src, n.linkEP.LinkAddress(), remote, ref, pkt)
return
}
@@ -1298,24 +1298,27 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
}
}
-func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
+func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
// TODO(b/143425874) Decrease the TTL field in forwarded packets.
if linkHeaderLen := int(n.linkEP.MaxHeaderLength()); linkHeaderLen != 0 {
pkt.Header = buffer.NewPrependable(linkHeaderLen)
}
+ // WritePacket takes ownership of pkt, calculate numBytes first.
+ numBytes := pkt.Header.UsedLength() + pkt.Data.Size()
+
if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, pkt); err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
return
}
n.stats.Tx.Packets.Increment()
- n.stats.Tx.Bytes.IncrementBy(uint64(pkt.Header.UsedLength() + pkt.Data.Size()))
+ n.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
}
// DeliverTransportPacket delivers the packets to the appropriate transport
// protocol endpoint.
-func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer) {
+func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
state, ok := n.stack.transportProtocols[protocol]
if !ok {
n.stack.stats.UnknownProtocolRcvdPackets.Increment()
@@ -1362,7 +1365,7 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
// DeliverTransportControlPacket delivers control packets to the appropriate
// transport protocol endpoint.
-func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt PacketBuffer) {
+func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) {
state, ok := n.stack.transportProtocols[trans]
if !ok {
return
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index d672fc157..fea46158c 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -44,7 +44,7 @@ func TestDisabledRxStatsWhenNICDisabled(t *testing.T) {
t.FailNow()
}
- nic.DeliverNetworkPacket(nil, "", "", 0, PacketBuffer{Data: buffer.View([]byte{1, 2, 3, 4}).ToVectorisedView()})
+ nic.DeliverNetworkPacket("", "", 0, &PacketBuffer{Data: buffer.View([]byte{1, 2, 3, 4}).ToVectorisedView()})
if got := nic.stats.DisabledRx.Packets.Value(); got != 1 {
t.Errorf("got DisabledRx.Packets = %d, want = 1", got)
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 926df4d7b..1b5da6017 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -24,6 +24,8 @@ import (
// multiple endpoints. Clone() should be called in such cases so that
// modifications to the Data field do not affect other copies.
type PacketBuffer struct {
+ _ noCopy
+
// PacketBufferEntry is used to build an intrusive list of
// PacketBuffers.
PacketBufferEntry
@@ -82,7 +84,32 @@ type PacketBuffer struct {
// VectorisedView but does not deep copy the underlying bytes.
//
// Clone also does not deep copy any of its other fields.
-func (pk PacketBuffer) Clone() PacketBuffer {
- pk.Data = pk.Data.Clone(nil)
- return pk
+//
+// FIXME(b/153685824): Data gets copied but not other header references.
+func (pk *PacketBuffer) Clone() *PacketBuffer {
+ return &PacketBuffer{
+ PacketBufferEntry: pk.PacketBufferEntry,
+ Data: pk.Data.Clone(nil),
+ Header: pk.Header,
+ LinkHeader: pk.LinkHeader,
+ NetworkHeader: pk.NetworkHeader,
+ TransportHeader: pk.TransportHeader,
+ Hash: pk.Hash,
+ Owner: pk.Owner,
+ EgressRoute: pk.EgressRoute,
+ GSOOptions: pk.GSOOptions,
+ NetworkProtocolNumber: pk.NetworkProtocolNumber,
+ NatDone: pk.NatDone,
+ }
}
+
+// noCopy may be embedded into structs which must not be copied
+// after the first use.
+//
+// See https://golang.org/issues/8005#issuecomment-190753527
+// for details.
+type noCopy struct{}
+
+// Lock is a no-op used by -copylocks checker from `go vet`.
+func (*noCopy) Lock() {}
+func (*noCopy) Unlock() {}
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index b331427c6..94f177841 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -67,12 +67,12 @@ type TransportEndpoint interface {
// this transport endpoint. It sets pkt.TransportHeader.
//
// HandlePacket takes ownership of pkt.
- HandlePacket(r *Route, id TransportEndpointID, pkt PacketBuffer)
+ HandlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer)
// HandleControlPacket is called by the stack when new control (e.g.
// ICMP) packets arrive to this transport endpoint.
// HandleControlPacket takes ownership of pkt.
- HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt PacketBuffer)
+ HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer)
// Abort initiates an expedited endpoint teardown. It puts the endpoint
// in a closed state and frees all resources associated with it. This
@@ -100,7 +100,7 @@ type RawTransportEndpoint interface {
// layer up.
//
// HandlePacket takes ownership of pkt.
- HandlePacket(r *Route, pkt PacketBuffer)
+ HandlePacket(r *Route, pkt *PacketBuffer)
}
// PacketEndpoint is the interface that needs to be implemented by packet
@@ -118,7 +118,7 @@ type PacketEndpoint interface {
// should construct its own ethernet header for applications.
//
// HandlePacket takes ownership of pkt.
- HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt PacketBuffer)
+ HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
}
// TransportProtocol is the interface that needs to be implemented by transport
@@ -150,7 +150,7 @@ type TransportProtocol interface {
// stats purposes only).
//
// HandleUnknownDestinationPacket takes ownership of pkt.
- HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt PacketBuffer) bool
+ HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) bool
// SetOption allows enabling/disabling protocol specific features.
// SetOption returns an error if the option is not supported or the
@@ -180,7 +180,7 @@ type TransportDispatcher interface {
// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
//
// DeliverTransportPacket takes ownership of pkt.
- DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer)
+ DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer)
// DeliverTransportControlPacket delivers control packets to the
// appropriate transport protocol endpoint.
@@ -189,7 +189,7 @@ type TransportDispatcher interface {
// DeliverTransportControlPacket.
//
// DeliverTransportControlPacket takes ownership of pkt.
- DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt PacketBuffer)
+ DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer)
}
// PacketLooping specifies where an outbound packet should be sent.
@@ -240,17 +240,18 @@ type NetworkEndpoint interface {
MaxHeaderLength() uint16
// WritePacket writes a packet to the given destination address and
- // protocol. It sets pkt.NetworkHeader. pkt.TransportHeader must have
- // already been set.
- WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt PacketBuffer) *tcpip.Error
+ // protocol. It takes ownership of pkt. pkt.TransportHeader must have already
+ // been set.
+ WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error
// WritePackets writes packets to the given destination address and
- // protocol. pkts must not be zero length.
+ // protocol. pkts must not be zero length. It takes ownership of pkts and
+ // underlying packets.
WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error)
// WriteHeaderIncludedPacket writes a packet that includes a network
- // header to the given destination address.
- WriteHeaderIncludedPacket(r *Route, pkt PacketBuffer) *tcpip.Error
+ // header to the given destination address. It takes ownership of pkt.
+ WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error
// ID returns the network protocol endpoint ID.
ID() *NetworkEndpointID
@@ -265,7 +266,7 @@ type NetworkEndpoint interface {
// this network endpoint. It sets pkt.NetworkHeader.
//
// HandlePacket takes ownership of pkt.
- HandlePacket(r *Route, pkt PacketBuffer)
+ HandlePacket(r *Route, pkt *PacketBuffer)
// Close is called when the endpoint is reomved from a stack.
Close()
@@ -326,7 +327,7 @@ type NetworkDispatcher interface {
// packets sent via loopback), and won't have the field set.
//
// DeliverNetworkPacket takes ownership of pkt.
- DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer)
+ DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
}
// LinkEndpointCapabilities is the type associated with the capabilities
@@ -382,17 +383,17 @@ type LinkEndpoint interface {
LinkAddress() tcpip.LinkAddress
// WritePacket writes a packet with the given protocol through the
- // given route. It sets pkt.LinkHeader if a link layer header exists.
- // pkt.NetworkHeader and pkt.TransportHeader must have already been
- // set.
+ // given route. It takes ownership of pkt. pkt.NetworkHeader and
+ // pkt.TransportHeader must have already been set.
//
// To participate in transparent bridging, a LinkEndpoint implementation
// should call eth.Encode with header.EthernetFields.SrcAddr set to
// r.LocalLinkAddress if it is provided.
- WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) *tcpip.Error
+ WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error
// WritePackets writes packets with the given protocol through the
- // given route. pkts must not be zero length.
+ // given route. pkts must not be zero length. It takes ownership of pkts and
+ // underlying packets.
//
// Right now, WritePackets is used only when the software segmentation
// offload is enabled. If it will be used for something else, it may
@@ -400,7 +401,7 @@ type LinkEndpoint interface {
WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error)
// WriteRawPacket writes a packet directly to the link. The packet
- // should already have an ethernet header.
+ // should already have an ethernet header. It takes ownership of vv.
WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error
// Attach attaches the data link layer endpoint to the network-layer
@@ -430,7 +431,7 @@ type InjectableLinkEndpoint interface {
LinkEndpoint
// InjectInbound injects an inbound packet.
- InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer)
+ InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
// InjectOutbound writes a fully formed outbound packet directly to the
// link.
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 150297ab9..f5b6ca0b9 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -153,17 +153,20 @@ func (r *Route) IsResolutionRequired() bool {
}
// WritePacket writes the packet through the given route.
-func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt PacketBuffer) *tcpip.Error {
+func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error {
if !r.ref.isValidForOutgoing() {
return tcpip.ErrInvalidEndpointState
}
+ // WritePacket takes ownership of pkt, calculate numBytes first.
+ numBytes := pkt.Header.UsedLength() + pkt.Data.Size()
+
err := r.ref.ep.WritePacket(r, gso, params, pkt)
if err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
} else {
r.ref.nic.stats.Tx.Packets.Increment()
- r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(pkt.Header.UsedLength() + pkt.Data.Size()))
+ r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
}
return err
}
@@ -175,9 +178,12 @@ func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHead
return 0, tcpip.ErrInvalidEndpointState
}
+ // WritePackets takes ownership of pkt, calculate length first.
+ numPkts := pkts.Len()
+
n, err := r.ref.ep.WritePackets(r, gso, pkts, params)
if err != nil {
- r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len() - n))
+ r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(numPkts - n))
}
r.ref.nic.stats.Tx.Packets.IncrementBy(uint64(n))
@@ -193,17 +199,20 @@ func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHead
// WriteHeaderIncludedPacket writes a packet already containing a network
// header through the given route.
-func (r *Route) WriteHeaderIncludedPacket(pkt PacketBuffer) *tcpip.Error {
+func (r *Route) WriteHeaderIncludedPacket(pkt *PacketBuffer) *tcpip.Error {
if !r.ref.isValidForOutgoing() {
return tcpip.ErrInvalidEndpointState
}
+ // WriteHeaderIncludedPacket takes ownership of pkt, calculate numBytes first.
+ numBytes := pkt.Data.Size()
+
if err := r.ref.ep.WriteHeaderIncludedPacket(r, pkt); err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
return err
}
r.ref.nic.stats.Tx.Packets.Increment()
- r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(pkt.Data.Size()))
+ r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
return nil
}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 0ab4c3e19..294ce8775 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -52,7 +52,7 @@ const (
type transportProtocolState struct {
proto TransportProtocol
- defaultHandler func(r *Route, id TransportEndpointID, pkt PacketBuffer) bool
+ defaultHandler func(r *Route, id TransportEndpointID, pkt *PacketBuffer) bool
}
// TCPProbeFunc is the expected function type for a TCP probe function to be
@@ -424,12 +424,8 @@ type Stack struct {
// handleLocal allows non-loopback interfaces to loop packets.
handleLocal bool
- // tablesMu protects iptables.
- tablesMu sync.RWMutex
-
- // tables are the iptables packet filtering and manipulation rules. The are
- // protected by tablesMu.`
- tables IPTables
+ // tables are the iptables packet filtering and manipulation rules.
+ tables *IPTables
// resumableEndpoints is a list of endpoints that need to be resumed if the
// stack is being restored.
@@ -676,6 +672,7 @@ func New(opts Options) *Stack {
clock: clock,
stats: opts.Stats.FillIn(),
handleLocal: opts.HandleLocal,
+ tables: DefaultTables(),
icmpRateLimiter: NewICMPRateLimiter(),
seed: generateRandUint32(),
ndpConfigs: opts.NDPConfigs,
@@ -778,7 +775,7 @@ func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber,
//
// It must be called only during initialization of the stack. Changing it as the
// stack is operating is not supported.
-func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, PacketBuffer) bool) {
+func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, *PacketBuffer) bool) {
state := s.transportProtocols[p]
if state != nil {
state.defaultHandler = h
@@ -1741,18 +1738,8 @@ func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool,
}
// IPTables returns the stack's iptables.
-func (s *Stack) IPTables() IPTables {
- s.tablesMu.RLock()
- t := s.tables
- s.tablesMu.RUnlock()
- return t
-}
-
-// SetIPTables sets the stack's iptables.
-func (s *Stack) SetIPTables(ipt IPTables) {
- s.tablesMu.Lock()
- s.tables = ipt
- s.tablesMu.Unlock()
+func (s *Stack) IPTables() *IPTables {
+ return s.tables
}
// ICMPLimit returns the maximum number of ICMP messages that can be sent
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 1a2cf007c..f6ddc3ced 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -90,7 +90,7 @@ func (f *fakeNetworkEndpoint) ID() *stack.NetworkEndpointID {
return &f.id
}
-func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
+func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
// Increment the received packet count in the protocol descriptor.
f.proto.packetCount[int(f.id.LocalAddress[0])%len(f.proto.packetCount)]++
@@ -132,7 +132,7 @@ func (f *fakeNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumbe
return f.proto.Number()
}
-func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt stack.PacketBuffer) *tcpip.Error {
+func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
// Increment the sent packet count in the protocol descriptor.
f.proto.sendPacketCount[int(r.RemoteAddress[0])%len(f.proto.sendPacketCount)]++
@@ -147,7 +147,7 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params
views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
views[0] = pkt.Header.View()
views = append(views, pkt.Data.Views()...)
- f.HandlePacket(r, stack.PacketBuffer{
+ f.HandlePacket(r, &stack.PacketBuffer{
Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
})
}
@@ -163,7 +163,7 @@ func (f *fakeNetworkEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts
panic("not implemented")
}
-func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
+func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
return tcpip.ErrNotSupported
}
@@ -293,7 +293,7 @@ func TestNetworkReceive(t *testing.T) {
// Make sure packet with wrong address is not delivered.
buf[0] = 3
- ep.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeNet.packetCount[1] != 0 {
@@ -305,7 +305,7 @@ func TestNetworkReceive(t *testing.T) {
// Make sure packet is delivered to first endpoint.
buf[0] = 1
- ep.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeNet.packetCount[1] != 1 {
@@ -317,7 +317,7 @@ func TestNetworkReceive(t *testing.T) {
// Make sure packet is delivered to second endpoint.
buf[0] = 2
- ep.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeNet.packetCount[1] != 1 {
@@ -328,7 +328,7 @@ func TestNetworkReceive(t *testing.T) {
}
// Make sure packet is not delivered if protocol number is wrong.
- ep.InjectInbound(fakeNetNumber-1, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber-1, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeNet.packetCount[1] != 1 {
@@ -340,7 +340,7 @@ func TestNetworkReceive(t *testing.T) {
// Make sure packet that is too small is dropped.
buf.CapLength(2)
- ep.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeNet.packetCount[1] != 1 {
@@ -362,7 +362,7 @@ func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) *tcpip.Erro
func send(r stack.Route, payload buffer.View) *tcpip.Error {
hdr := buffer.NewPrependable(int(r.MaxHeaderLength()))
- return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: payload.ToVectorisedView(),
})
@@ -420,7 +420,7 @@ func testFailingRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte b
func testRecvInternal(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, ep *channel.Endpoint, buf buffer.View, want int) {
t.Helper()
- ep.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if got := fakeNet.PacketCount(localAddrByte); got != want {
@@ -2263,7 +2263,7 @@ func TestNICStats(t *testing.T) {
// Send a packet to address 1.
buf := buffer.NewView(30)
- ep1.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep1.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if got, want := s.NICInfo()[1].Stats.Rx.Packets.Value(), uint64(1); got != want {
@@ -2345,7 +2345,7 @@ func TestNICForwarding(t *testing.T) {
// Send a packet to dstAddr.
buf := buffer.NewView(30)
buf[0] = dstAddr[0]
- ep1.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep1.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index 9a33ed375..e09866405 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -152,7 +152,7 @@ func (epsByNIC *endpointsByNIC) transportEndpoints() []TransportEndpoint {
// HandlePacket is called by the stack when new packets arrive to this transport
// endpoint.
-func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, pkt PacketBuffer) {
+func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
epsByNIC.mu.RLock()
mpep, ok := epsByNIC.endpoints[r.ref.nic.ID()]
@@ -183,7 +183,7 @@ func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, p
}
// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt PacketBuffer) {
+func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer) {
epsByNIC.mu.RLock()
defer epsByNIC.mu.RUnlock()
@@ -251,7 +251,7 @@ type transportDemuxer struct {
// the dispatcher to delivery packets to the QueuePacket method instead of
// calling HandlePacket directly on the endpoint.
type queuedTransportProtocol interface {
- QueuePacket(r *Route, ep TransportEndpoint, id TransportEndpointID, pkt PacketBuffer)
+ QueuePacket(r *Route, ep TransportEndpoint, id TransportEndpointID, pkt *PacketBuffer)
}
func newTransportDemuxer(stack *Stack) *transportDemuxer {
@@ -379,7 +379,7 @@ func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32
return mpep.endpoints[idx]
}
-func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt PacketBuffer) {
+func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
ep.mu.RLock()
queuedProtocol, mustQueue := ep.demux.queuedProtocols[protocolIDs{ep.netProto, ep.transProto}]
// HandlePacket takes ownership of pkt, so each endpoint needs
@@ -470,7 +470,7 @@ func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolN
// deliverPacket attempts to find one or more matching transport endpoints, and
// then, if matches are found, delivers the packet to them. Returns true if
// the packet no longer needs to be handled.
-func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer, id TransportEndpointID) bool {
+func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer, id TransportEndpointID) bool {
eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
if !ok {
return false
@@ -520,7 +520,7 @@ func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProto
// deliverRawPacket attempts to deliver the given packet and returns whether it
// was delivered successfully.
-func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer) bool {
+func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) bool {
eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
if !ok {
return false
@@ -544,7 +544,7 @@ func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportPr
// deliverControlPacket attempts to deliver the given control packet. Returns
// true if it found an endpoint, false otherwise.
-func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt PacketBuffer, id TransportEndpointID) bool {
+func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer, id TransportEndpointID) bool {
eps, ok := d.protocol[protocolIDs{net, trans}]
if !ok {
return false
diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go
index 2474a7db3..67d778137 100644
--- a/pkg/tcpip/stack/transport_demuxer_test.go
+++ b/pkg/tcpip/stack/transport_demuxer_test.go
@@ -127,7 +127,7 @@ func (c *testContext) sendV4Packet(payload []byte, h *headers, linkEpID tcpip.NI
u.SetChecksum(^u.CalculateChecksum(xsum))
// Inject packet.
- c.linkEps[linkEpID].InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEps[linkEpID].InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
NetworkHeader: buffer.View(ip),
TransportHeader: buffer.View(u),
@@ -165,7 +165,7 @@ func (c *testContext) sendV6Packet(payload []byte, h *headers, linkEpID tcpip.NI
u.SetChecksum(^u.CalculateChecksum(xsum))
// Inject packet.
- c.linkEps[linkEpID].InjectInbound(ipv6.ProtocolNumber, stack.PacketBuffer{
+ c.linkEps[linkEpID].InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
NetworkHeader: buffer.View(ip),
TransportHeader: buffer.View(u),
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index a611e44ab..cb350ead3 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -88,7 +88,7 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions
if err != nil {
return 0, nil, err
}
- if err := f.route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := f.route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: buffer.View(v).ToVectorisedView(),
}); err != nil {
@@ -215,7 +215,7 @@ func (*fakeTransportEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Erro
return tcpip.FullAddress{}, nil
}
-func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, _ stack.PacketBuffer) {
+func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, _ *stack.PacketBuffer) {
// Increment the number of received packets.
f.proto.packetCount++
if f.acceptQueue != nil {
@@ -232,7 +232,7 @@ func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportE
}
}
-func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, stack.ControlType, uint32, stack.PacketBuffer) {
+func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, stack.ControlType, uint32, *stack.PacketBuffer) {
// Increment the number of received control packets.
f.proto.controlCount++
}
@@ -289,7 +289,7 @@ func (*fakeTransportProtocol) ParsePorts(buffer.View) (src, dst uint16, err *tcp
return 0, 0, nil
}
-func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, stack.PacketBuffer) bool {
+func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *stack.PacketBuffer) bool {
return true
}
@@ -369,7 +369,7 @@ func TestTransportReceive(t *testing.T) {
// Make sure packet with wrong protocol is not delivered.
buf[0] = 1
buf[2] = 0
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.packetCount != 0 {
@@ -380,7 +380,7 @@ func TestTransportReceive(t *testing.T) {
buf[0] = 1
buf[1] = 3
buf[2] = byte(fakeTransNumber)
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.packetCount != 0 {
@@ -391,7 +391,7 @@ func TestTransportReceive(t *testing.T) {
buf[0] = 1
buf[1] = 2
buf[2] = byte(fakeTransNumber)
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.packetCount != 1 {
@@ -446,7 +446,7 @@ func TestTransportControlReceive(t *testing.T) {
buf[fakeNetHeaderLen+0] = 0
buf[fakeNetHeaderLen+1] = 1
buf[fakeNetHeaderLen+2] = 0
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.controlCount != 0 {
@@ -457,7 +457,7 @@ func TestTransportControlReceive(t *testing.T) {
buf[fakeNetHeaderLen+0] = 3
buf[fakeNetHeaderLen+1] = 1
buf[fakeNetHeaderLen+2] = byte(fakeTransNumber)
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.controlCount != 0 {
@@ -468,7 +468,7 @@ func TestTransportControlReceive(t *testing.T) {
buf[fakeNetHeaderLen+0] = 2
buf[fakeNetHeaderLen+1] = 1
buf[fakeNetHeaderLen+2] = byte(fakeTransNumber)
- linkEP.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ linkEP.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
if fakeTrans.controlCount != 1 {
@@ -623,7 +623,7 @@ func TestTransportForwarding(t *testing.T) {
req[0] = 1
req[1] = 3
req[2] = byte(fakeTransNumber)
- ep2.InjectInbound(fakeNetNumber, stack.PacketBuffer{
+ ep2.InjectInbound(fakeNetNumber, &stack.PacketBuffer{
Data: req.ToVectorisedView(),
})
diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go
index 2f98a996f..7f172f978 100644
--- a/pkg/tcpip/time_unsafe.go
+++ b/pkg/tcpip/time_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.9
-// +build !go1.15
+// +build !go1.16
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index b1d820372..3bc72bc19 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -140,11 +140,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
e.owner = owner
}
-// IPTables implements tcpip.Endpoint.IPTables.
-func (e *endpoint) IPTables() (stack.IPTables, error) {
- return e.stack.IPTables(), nil
-}
-
// Read reads data from the endpoint. This method does not block if
// there is no data pending.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -450,7 +445,7 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpi
if ttl == 0 {
ttl = r.DefaultTTL()
}
- return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: data.ToVectorisedView(),
TransportHeader: buffer.View(icmpv4),
@@ -481,7 +476,7 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err
if ttl == 0 {
ttl = r.DefaultTTL()
}
- return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: dataVV,
TransportHeader: buffer.View(icmpv6),
@@ -743,7 +738,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
// HandlePacket is called by the stack when new packets arrive to this transport
// endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
// Only accept echo replies.
switch e.NetProto {
case header.IPv4ProtocolNumber:
@@ -805,7 +800,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
}
// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
}
// State implements tcpip.Endpoint.State. The ICMP endpoint currently doesn't
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 3c47692b2..2ec6749c7 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -104,7 +104,7 @@ func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error)
// HandleUnknownDestinationPacket handles packets targeted at this protocol but
// that don't match any existing endpoint.
-func (*protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, stack.PacketBuffer) bool {
+func (*protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *stack.PacketBuffer) bool {
return true
}
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 23158173d..baf08eda6 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -132,11 +132,6 @@ func (ep *endpoint) Close() {
// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
func (ep *endpoint) ModerateRecvBuf(copied int) {}
-// IPTables implements tcpip.Endpoint.IPTables.
-func (ep *endpoint) IPTables() (stack.IPTables, error) {
- return ep.stack.IPTables(), nil
-}
-
// Read implements tcpip.Endpoint.Read.
func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
ep.rcvMu.Lock()
@@ -298,7 +293,7 @@ func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
}
// HandlePacket implements stack.PacketEndpoint.HandlePacket.
-func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
ep.rcvMu.Lock()
// Drop the packet if our buffer is currently full.
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index eee754a5a..21c34fac2 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -166,11 +166,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
e.owner = owner
}
-// IPTables implements tcpip.Endpoint.IPTables.
-func (e *endpoint) IPTables() (stack.IPTables, error) {
- return e.stack.IPTables(), nil
-}
-
// Read implements tcpip.Endpoint.Read.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
if !e.associated {
@@ -348,7 +343,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
switch e.NetProto {
case header.IPv4ProtocolNumber:
if !e.associated {
- if err := route.WriteHeaderIncludedPacket(stack.PacketBuffer{
+ if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
Data: buffer.View(payloadBytes).ToVectorisedView(),
}); err != nil {
return 0, nil, err
@@ -357,7 +352,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
}
hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength()))
- if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: e.TransProto, TTL: route.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: e.TransProto, TTL: route.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: buffer.View(payloadBytes).ToVectorisedView(),
Owner: e.owner,
@@ -584,7 +579,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
}
// HandlePacket implements stack.RawTransportEndpoint.HandlePacket.
-func (e *endpoint) HandlePacket(route *stack.Route, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
e.rcvMu.Lock()
// Drop the packet if our buffer is currently full.
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index a7e088d4e..7da93dcc4 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -833,13 +833,13 @@ func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stac
return sendTCPBatch(r, tf, data, gso, owner)
}
- pkt := stack.PacketBuffer{
+ pkt := &stack.PacketBuffer{
Header: buffer.NewPrependable(header.TCPMinimumSize + int(r.MaxHeaderLength()) + optLen),
Data: data,
Hash: tf.txHash,
Owner: owner,
}
- buildTCPHdr(r, tf, &pkt, gso)
+ buildTCPHdr(r, tf, pkt, gso)
if tf.ttl == 0 {
tf.ttl = r.DefaultTTL()
@@ -1347,6 +1347,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
e.setEndpointState(StateError)
e.HardError = err
+ e.workerCleanup = true
// Lock released below.
epilogue()
return err
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index 6062ca916..047704c80 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -186,7 +186,7 @@ func (d *dispatcher) wait() {
}
}
-func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
+func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
ep := stackEP.(*endpoint)
s := newSegment(r, id, pkt)
if !s.parse() {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index b5ba972f1..edca98160 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1172,11 +1172,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
e.owner = owner
}
-// IPTables implements tcpip.Endpoint.IPTables.
-func (e *endpoint) IPTables() (stack.IPTables, error) {
- return e.stack.IPTables(), nil
-}
-
// Read reads data from the endpoint.
func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
e.LockUser()
@@ -2462,7 +2457,7 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
}, nil
}
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
// TCP HandlePacket is not required anymore as inbound packets first
// land at the Dispatcher which then can either delivery using the
// worker go routine or directly do the invoke the tcp processing inline
@@ -2481,7 +2476,7 @@ func (e *endpoint) enqueueSegment(s *segment) bool {
}
// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
switch typ {
case stack.ControlPacketTooBig:
e.sndBufMu.Lock()
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index 704d01c64..070b634b4 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -61,7 +61,7 @@ func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*Forward
//
// This function is expected to be passed as an argument to the
// stack.SetTransportProtocolHandler function.
-func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
+func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
s := newSegment(r, id, pkt)
defer s.decRef()
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 2a2a7ddeb..c827d0277 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -206,7 +206,7 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
// to a specific processing queue. Each queue is serviced by its own processor
// goroutine which is responsible for dequeuing and doing full TCP dispatch of
// the packet.
-func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
+func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
p.dispatcher.queuePacket(r, ep, id, pkt)
}
@@ -217,7 +217,7 @@ func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id st
// a reset is sent in response to any incoming segment except another reset. In
// particular, SYNs addressed to a non-existent connection are rejected by this
// means."
-func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
+func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
s := newSegment(r, id, pkt)
defer s.decRef()
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 074edded6..0c099e2fd 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -60,7 +60,7 @@ type segment struct {
xmitCount uint32
}
-func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) *segment {
+func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
s := &segment{
refCnt: 1,
id: id,
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 06dc9b7d7..acacb42e4 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -618,6 +618,20 @@ func (s *sender) splitSeg(seg *segment, size int) {
nSeg.data.TrimFront(size)
nSeg.sequenceNumber.UpdateForward(seqnum.Size(size))
s.writeList.InsertAfter(seg, nSeg)
+
+ // The segment being split does not carry PUSH flag because it is
+ // followed by the newly split segment.
+ // RFC1122 section 4.2.2.2: MUST set the PSH bit in the last buffered
+ // segment (i.e., when there is no more queued data to be sent).
+ // Linux removes PSH flag only when the segment is being split over MSS
+ // and retains it when we are splitting the segment over lack of sender
+ // window space.
+ // ref: net/ipv4/tcp_output.c::tcp_write_xmit(), tcp_mss_split_point()
+ // ref: net/ipv4/tcp_output.c::tcp_write_wakeup(), tcp_snd_wnd_test()
+ if seg.data.Size() > s.maxPayloadSize {
+ seg.flags ^= header.TCPFlagPsh
+ }
+
seg.data.CapLength(size)
}
@@ -739,7 +753,7 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
if !s.isAssignedSequenceNumber(seg) {
// Merge segments if allowed.
if seg.data.Size() != 0 {
- available := int(seg.sequenceNumber.Size(end))
+ available := int(s.sndNxt.Size(end))
if available > limit {
available = limit
}
@@ -782,8 +796,11 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
// sent all at once.
return false
}
- if atomic.LoadUint32(&s.ep.cork) != 0 {
- // Hold back the segment until full.
+ // With TCP_CORK, hold back until minimum of the available
+ // send space and MSS.
+ // TODO(gvisor.dev/issue/2833): Drain the held segments after a
+ // timeout.
+ if seg.data.Size() < s.maxPayloadSize && atomic.LoadUint32(&s.ep.cork) != 0 {
return false
}
}
@@ -816,6 +833,25 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
panic("Netstack queues FIN segments without data.")
}
+ segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
+ // If the entire segment cannot be accomodated in the receiver
+ // advertized window, skip splitting and sending of the segment.
+ // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test()
+ //
+ // Linux checks this for all segment transmits not triggered
+ // by a probe timer. On this condition, it defers the segment
+ // split and transmit to a short probe timer.
+ // ref: include/net/tcp.h::tcp_check_probe_timer()
+ // ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
+ //
+ // Instead of defining a new transmit timer, we attempt to split the
+ // segment right here if there are no pending segments.
+ // If there are pending segments, segment transmits are deferred
+ // to the retransmit timer handler.
+ if s.sndUna != s.sndNxt && !segEnd.LessThan(end) {
+ return false
+ }
+
if !seg.sequenceNumber.LessThan(end) {
return false
}
@@ -824,9 +860,17 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
if available == 0 {
return false
}
+
+ // The segment size limit is computed as a function of sender congestion
+ // window and MSS. When sender congestion window is > 1, this limit can
+ // be larger than MSS. Ensure that the currently available send space
+ // is not greater than minimum of this limit and MSS.
if available > limit {
available = limit
}
+ if available > s.maxPayloadSize {
+ available = s.maxPayloadSize
+ }
if seg.data.Size() > available {
s.splitSeg(seg, available)
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 7b1d72cf4..9721f6caf 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -316,7 +316,7 @@ func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code uint8, p1, p2 []byt
copy(icmp[header.ICMPv4PayloadOffset:], p2)
// Inject packet.
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
}
@@ -372,7 +372,7 @@ func (c *Context) BuildSegmentWithAddrs(payload []byte, h *Headers, src, dst tcp
// SendSegment sends a TCP segment that has already been built and written to a
// buffer.VectorisedView.
func (c *Context) SendSegment(s buffer.VectorisedView) {
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: s,
})
}
@@ -380,7 +380,7 @@ func (c *Context) SendSegment(s buffer.VectorisedView) {
// SendPacket builds and sends a TCP segment(with the provided payload & TCP
// headers) in an IPv4 packet via the link layer endpoint.
func (c *Context) SendPacket(payload []byte, h *Headers) {
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: c.BuildSegment(payload, h),
})
}
@@ -389,7 +389,7 @@ func (c *Context) SendPacket(payload []byte, h *Headers) {
// & TCPheaders) in an IPv4 packet via the link layer endpoint using the
// provided source and destination IPv4 addresses.
func (c *Context) SendPacketWithAddrs(payload []byte, h *Headers, src, dst tcpip.Address) {
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: c.BuildSegmentWithAddrs(payload, h, src, dst),
})
}
@@ -564,7 +564,7 @@ func (c *Context) SendV6PacketWithAddrs(payload []byte, h *Headers, src, dst tcp
t.SetChecksum(^t.CalculateChecksum(xsum))
// Inject packet.
- c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
})
}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 647b2067a..663af8fec 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -247,11 +247,6 @@ func (e *endpoint) Close() {
// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
func (e *endpoint) ModerateRecvBuf(copied int) {}
-// IPTables implements tcpip.Endpoint.IPTables.
-func (e *endpoint) IPTables() (stack.IPTables, error) {
- return e.stack.IPTables(), nil
-}
-
// Read reads data from the endpoint. This method does not block if
// there is no data pending.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -921,7 +916,11 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
if useDefaultTTL {
ttl = r.DefaultTTL()
}
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: ProtocolNumber, TTL: ttl, TOS: tos}, stack.PacketBuffer{
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: ProtocolNumber,
+ TTL: ttl,
+ TOS: tos,
+ }, &stack.PacketBuffer{
Header: hdr,
Data: data,
TransportHeader: buffer.View(udp),
@@ -1269,7 +1268,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
// HandlePacket is called by the stack when new packets arrive to this transport
// endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
// Get the header then trim it from the view.
hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize)
if !ok || int(header.UDP(hdr).Length()) > pkt.Data.Size() {
@@ -1336,7 +1335,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
}
// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
+func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
if typ == stack.ControlPortUnreachable {
e.mu.RLock()
defer e.mu.RUnlock()
diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go
index a674ceb68..7abfa0ed2 100644
--- a/pkg/tcpip/transport/udp/forwarder.go
+++ b/pkg/tcpip/transport/udp/forwarder.go
@@ -43,7 +43,7 @@ func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder {
//
// This function is expected to be passed as an argument to the
// stack.SetTransportProtocolHandler function.
-func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
+func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
f.handler(&ForwarderRequest{
stack: f.stack,
route: r,
@@ -61,7 +61,7 @@ type ForwarderRequest struct {
stack *stack.Stack
route *stack.Route
id stack.TransportEndpointID
- pkt stack.PacketBuffer
+ pkt *stack.PacketBuffer
}
// ID returns the 4-tuple (src address, src port, dst address, dst port) that
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 52af6de22..e320c5758 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -66,7 +66,7 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
// HandleUnknownDestinationPacket handles packets targeted at this protocol but
// that don't match any existing endpoint.
-func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
+func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
// Get the header then trim it from the view.
h, ok := pkt.Data.PullUp(header.UDPMinimumSize)
if !ok {
@@ -140,7 +140,7 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
pkt.SetType(header.ICMPv4DstUnreachable)
pkt.SetCode(header.ICMPv4PortUnreachable)
pkt.SetChecksum(header.ICMPv4Checksum(pkt, payload))
- r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: payload,
})
@@ -177,7 +177,7 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
pkt.SetType(header.ICMPv6DstUnreachable)
pkt.SetCode(header.ICMPv6PortUnreachable)
pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, payload))
- r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
+ r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
Header: hdr,
Data: payload,
})
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 8acaa607a..e8ade882b 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -440,7 +440,7 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool
u.SetChecksum(^u.CalculateChecksum(xsum))
// Inject packet.
- c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
NetworkHeader: buffer.View(ip),
TransportHeader: buffer.View(u),
@@ -487,7 +487,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool
// Inject packet.
- c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.PacketBuffer{
+ c.linkEP.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
Data: buf.ToVectorisedView(),
NetworkHeader: buffer.View(ip),
TransportHeader: buffer.View(u),
diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD
deleted file mode 100644
index 2dcba84ae..000000000
--- a/pkg/tmutex/BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-load("//tools:defs.bzl", "go_library", "go_test")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "tmutex",
- srcs = ["tmutex.go"],
- visibility = ["//:sandbox"],
-)
-
-go_test(
- name = "tmutex_test",
- size = "medium",
- srcs = ["tmutex_test.go"],
- library = ":tmutex",
- deps = ["//pkg/sync"],
-)
diff --git a/pkg/tmutex/tmutex.go b/pkg/tmutex/tmutex.go
deleted file mode 100644
index c4685020d..000000000
--- a/pkg/tmutex/tmutex.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package tmutex provides the implementation of a mutex that implements an
-// efficient TryLock function in addition to Lock and Unlock.
-package tmutex
-
-import (
- "sync/atomic"
-)
-
-// Mutex is a mutual exclusion primitive that implements TryLock in addition
-// to Lock and Unlock.
-type Mutex struct {
- v int32
- ch chan struct{}
-}
-
-// Init initializes the mutex.
-func (m *Mutex) Init() {
- m.v = 1
- m.ch = make(chan struct{}, 1)
-}
-
-// Lock acquires the mutex. If it is currently held by another goroutine, Lock
-// will wait until it has a chance to acquire it.
-func (m *Mutex) Lock() {
- // Uncontended case.
- if atomic.AddInt32(&m.v, -1) == 0 {
- return
- }
-
- for {
- // Try to acquire the mutex again, at the same time making sure
- // that m.v is negative, which indicates to the owner of the
- // lock that it is contended, which will force it to try to wake
- // someone up when it releases the mutex.
- if v := atomic.LoadInt32(&m.v); v >= 0 && atomic.SwapInt32(&m.v, -1) == 1 {
- return
- }
-
- // Wait for the mutex to be released before trying again.
- <-m.ch
- }
-}
-
-// TryLock attempts to acquire the mutex without blocking. If the mutex is
-// currently held by another goroutine, it fails to acquire it and returns
-// false.
-func (m *Mutex) TryLock() bool {
- v := atomic.LoadInt32(&m.v)
- if v <= 0 {
- return false
- }
- return atomic.CompareAndSwapInt32(&m.v, 1, 0)
-}
-
-// Unlock releases the mutex.
-func (m *Mutex) Unlock() {
- if atomic.SwapInt32(&m.v, 1) == 0 {
- // There were no pending waiters.
- return
- }
-
- // Wake some waiter up.
- select {
- case m.ch <- struct{}{}:
- default:
- }
-}
diff --git a/pkg/tmutex/tmutex_test.go b/pkg/tmutex/tmutex_test.go
deleted file mode 100644
index 05540696a..000000000
--- a/pkg/tmutex/tmutex_test.go
+++ /dev/null
@@ -1,258 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmutex
-
-import (
- "fmt"
- "runtime"
- "sync/atomic"
- "testing"
- "time"
-
- "gvisor.dev/gvisor/pkg/sync"
-)
-
-func TestBasicLock(t *testing.T) {
- var m Mutex
- m.Init()
-
- m.Lock()
-
- // Try blocking lock the mutex from a different goroutine. This must
- // not block because the mutex is held.
- ch := make(chan struct{}, 1)
- go func() {
- m.Lock()
- ch <- struct{}{}
- m.Unlock()
- ch <- struct{}{}
- }()
-
- select {
- case <-ch:
- t.Fatalf("Lock succeeded on locked mutex")
- case <-time.After(100 * time.Millisecond):
- }
-
- // Unlock the mutex and make sure that the goroutine waiting on Lock()
- // unblocks and succeeds.
- m.Unlock()
-
- select {
- case <-ch:
- case <-time.After(100 * time.Millisecond):
- t.Fatalf("Lock failed to acquire unlocked mutex")
- }
-
- // Make sure we can lock and unlock again.
- m.Lock()
- m.Unlock()
-}
-
-func TestTryLock(t *testing.T) {
- var m Mutex
- m.Init()
-
- // Try to lock. It should succeed.
- if !m.TryLock() {
- t.Fatalf("TryLock failed on unlocked mutex")
- }
-
- // Try to lock again, it should now fail.
- if m.TryLock() {
- t.Fatalf("TryLock succeeded on locked mutex")
- }
-
- // Try blocking lock the mutex from a different goroutine. This must
- // not block because the mutex is held.
- ch := make(chan struct{}, 1)
- go func() {
- m.Lock()
- ch <- struct{}{}
- m.Unlock()
- }()
-
- select {
- case <-ch:
- t.Fatalf("Lock succeeded on locked mutex")
- case <-time.After(100 * time.Millisecond):
- }
-
- // Unlock the mutex and make sure that the goroutine waiting on Lock()
- // unblocks and succeeds.
- m.Unlock()
-
- select {
- case <-ch:
- case <-time.After(100 * time.Millisecond):
- t.Fatalf("Lock failed to acquire unlocked mutex")
- }
-}
-
-func TestMutualExclusion(t *testing.T) {
- var m Mutex
- m.Init()
-
- // Test mutual exclusion by running "gr" goroutines concurrently, and
- // have each one increment a counter "iters" times within the critical
- // section established by the mutex.
- //
- // If at the end the counter is not gr * iters, then we know that
- // goroutines ran concurrently within the critical section.
- //
- // If one of the goroutines doesn't complete, it's likely a bug that
- // causes to it to wait forever.
- const gr = 1000
- const iters = 100000
- v := 0
- var wg sync.WaitGroup
- for i := 0; i < gr; i++ {
- wg.Add(1)
- go func() {
- for j := 0; j < iters; j++ {
- m.Lock()
- v++
- m.Unlock()
- }
- wg.Done()
- }()
- }
-
- wg.Wait()
-
- if v != gr*iters {
- t.Fatalf("Bad count: got %v, want %v", v, gr*iters)
- }
-}
-
-func TestMutualExclusionWithTryLock(t *testing.T) {
- var m Mutex
- m.Init()
-
- // Similar to the previous, with the addition of some goroutines that
- // only increment the count if TryLock succeeds.
- const gr = 1000
- const iters = 100000
- total := int64(gr * iters)
- var tryTotal int64
- v := int64(0)
- var wg sync.WaitGroup
- for i := 0; i < gr; i++ {
- wg.Add(2)
- go func() {
- for j := 0; j < iters; j++ {
- m.Lock()
- v++
- m.Unlock()
- }
- wg.Done()
- }()
- go func() {
- local := int64(0)
- for j := 0; j < iters; j++ {
- if m.TryLock() {
- v++
- m.Unlock()
- local++
- }
- }
- atomic.AddInt64(&tryTotal, local)
- wg.Done()
- }()
- }
-
- wg.Wait()
-
- t.Logf("tryTotal = %d", tryTotal)
- total += tryTotal
-
- if v != total {
- t.Fatalf("Bad count: got %v, want %v", v, total)
- }
-}
-
-// BenchmarkTmutex is equivalent to TestMutualExclusion, with the following
-// differences:
-//
-// - The number of goroutines is variable, with the maximum value depending on
-// GOMAXPROCS.
-//
-// - The number of iterations per benchmark is controlled by the benchmarking
-// framework.
-//
-// - Care is taken to ensure that all goroutines participating in the benchmark
-// have been created before the benchmark begins.
-func BenchmarkTmutex(b *testing.B) {
- for n, max := 1, 4*runtime.GOMAXPROCS(0); n > 0 && n <= max; n *= 2 {
- b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
- var m Mutex
- m.Init()
-
- var ready sync.WaitGroup
- begin := make(chan struct{})
- var end sync.WaitGroup
- for i := 0; i < n; i++ {
- ready.Add(1)
- end.Add(1)
- go func() {
- ready.Done()
- <-begin
- for j := 0; j < b.N; j++ {
- m.Lock()
- m.Unlock()
- }
- end.Done()
- }()
- }
-
- ready.Wait()
- b.ResetTimer()
- close(begin)
- end.Wait()
- })
- }
-}
-
-// BenchmarkSyncMutex is equivalent to BenchmarkTmutex, but uses sync.Mutex as
-// a comparison point.
-func BenchmarkSyncMutex(b *testing.B) {
- for n, max := 1, 4*runtime.GOMAXPROCS(0); n > 0 && n <= max; n *= 2 {
- b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
- var m sync.Mutex
-
- var ready sync.WaitGroup
- begin := make(chan struct{})
- var end sync.WaitGroup
- for i := 0; i < n; i++ {
- ready.Add(1)
- end.Add(1)
- go func() {
- ready.Done()
- <-begin
- for j := 0; j < b.N; j++ {
- m.Lock()
- m.Unlock()
- }
- end.Done()
- }()
- }
-
- ready.Wait()
- b.ResetTimer()
- close(begin)
- end.Wait()
- })
- }
-}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 98cdd90dd..60e33425f 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -288,6 +288,14 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_SIGALTSTACK: {},
unix.SYS_STATX: {},
syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TEE: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(1), /* len */
+ seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */
+ },
+ },
syscall.SYS_TGKILL: []seccomp.Rule{
{
seccomp.AllowValue(uint64(os.Getpid())),
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e1181271a..b98a1eb50 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -37,6 +37,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
@@ -62,7 +63,7 @@ const (
)
// tmpfs has some extra supported options that we must pass through.
-var tmpfsAllowedOptions = []string{"mode", "uid", "gid"}
+var tmpfsAllowedData = []string{"mode", "uid", "gid"}
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
// Upper layer uses the same flags as lower, but it must be read-write.
@@ -153,8 +154,8 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
return mounts
}
-// p9MountOptions creates a slice of options for a p9 mount.
-func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string {
+// p9MountData creates a slice of p9 mount data.
+func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
opts := []string{
"trans=fd",
"rfdno=" + strconv.Itoa(fd),
@@ -221,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags {
mf.NoAtime = true
case "noexec":
mf.NoExec = true
- case "bind", "rbind":
- // When options include either "bind" or "rbind",
- // it's converted to a 9P mount.
default:
log.Warningf("ignoring unknown mount option %q", o)
}
@@ -237,7 +235,7 @@ func isSupportedMountFlag(fstype, opt string) bool {
return true
}
if fstype == tmpfsvfs2.Name {
- ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
+ ok, err := parseMountOption(opt, tmpfsAllowedData...)
return ok && err == nil
}
return false
@@ -294,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter,
// Set namespace here so that it can be found in ctx.
procArgs.MountNamespace = mns
- return setExecutablePath(ctx, procArgs)
-}
-
-// setExecutablePath sets the procArgs.Filename by searching the PATH for an
-// executable matching the procArgs.Argv[0].
-func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- paths := fs.GetPath(procArgs.Envv)
- exe := procArgs.Argv[0]
- f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
if err != nil {
- return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
procArgs.Filename = f
return nil
@@ -725,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
- opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
if conf.OverlayfsStaleRead {
// We can't check for overlayfs here because sandbox is chroot'ed and gofer
@@ -770,10 +761,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
useOverlay bool
)
- if isBindMount(m) {
- m.Type = bind
- }
-
switch m.Type {
case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
fsName = m.Type
@@ -783,7 +770,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
fsName = m.Type
var err error
- opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
if err != nil {
return "", nil, false, err
}
@@ -791,7 +778,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
case bind:
fd := c.fds.remove()
fsName = gofervfs2.Name
- opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2)
+ opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -801,18 +788,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
return fsName, opts, useOverlay, nil
}
-func isBindMount(m specs.Mount) bool {
- for _, opt := range m.Options {
- // When options include either "bind" or "rbind", this behaves as
- // bind mount even if the mount type is equal to a filesystem supported
- // on runsc.
- if opt == "bind" || opt == "rbind" {
- return true
- }
- }
- return false
-}
-
func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
if hint := c.hints.findMount(mount); hint != nil {
return hint.fileAccessType()
@@ -956,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Add root mount.
fd := c.fds.remove()
- opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
mf := fs.MountSourceFlags{}
if c.root.Readonly || conf.Overlay {
@@ -1044,7 +1019,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
Destination: "/tmp",
// Sticky bit is added to prevent accidental deletion of files from
// another user. This is normally done for /tmp.
- Options: []string{"mode=1777"},
+ Options: []string{"mode=01777"},
}
return c.mountSubmount(ctx, conf, mns, root, tmpMount)
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f802bc9fb..002479612 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1056,7 +1056,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
}
- s.FillDefaultIPTables()
+ s.FillIPTablesMetadata()
return &s, nil
}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 147c901c4..6c84f0794 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -22,22 +22,21 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/devices/memdev"
- "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
- "gvisor.dev/gvisor/pkg/syserror"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
@@ -95,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte
return fmt.Errorf("failed to setupFS: %w", err)
}
procArgs.MountNamespaceVFS2 = mns
- return setExecutablePathVFS2(ctx, procArgs)
-}
-
-func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- exe := procArgs.Argv[0]
-
- // Absolute paths can be used directly.
- if path.IsAbs(exe) {
- procArgs.Filename = exe
- return nil
- }
-
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(exe, '/') > 0 {
- if !path.IsAbs(procArgs.WorkingDirectory) {
- return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory)
- }
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
-
- // Paths with a '/' are relative to the CWD.
- if strings.IndexByte(exe, '/') > 0 {
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
- // Otherwise, We must lookup the name in the paths, starting from the
- // root directory.
- root := procArgs.MountNamespaceVFS2.Root()
- defer root.DecRef()
-
- paths := fs.GetPath(procArgs.Envv)
- creds := procArgs.Credentials
-
- for _, p := range paths {
- binPath := path.Join(p, exe)
- pop := &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(binPath),
- FollowFinalSymlink: true,
- }
- opts := &vfs.OpenOptions{
- FileExec: true,
- Flags: linux.O_RDONLY,
- }
- dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
- if err == syserror.ENOENT || err == syserror.EACCES {
- // Didn't find it here.
- continue
- }
- if err != nil {
- return err
- }
- dentry.DecRef()
-
- procArgs.Filename = binPath
- return nil
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
+ if err != nil {
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
-
- return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":"))
+ procArgs.Filename = f
+ return nil
}
func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
@@ -192,7 +136,7 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs
func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
fd := c.fds.remove()
- opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",")
+ opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",")
log.Infof("Mounting root over 9P, ioFD: %d", fd)
mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts})
@@ -216,8 +160,9 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config,
}
}
- // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go.
-
+ if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
+ return fmt.Errorf(`mount submount "\tmp": %w`, err)
+ }
return nil
}
@@ -235,7 +180,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
fd := -1
// Only bind mounts use host FDs; see
// containerMounter.getMountNameAndOptionsVFS2.
- if m.Type == bind || isBindMount(m) {
+ if m.Type == bind {
fd = c.fds.remove()
}
mounts = append(mounts, mountAndFD{
@@ -255,8 +200,6 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
return mounts, nil
}
-// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1
-// version.
func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
root := mns.Root()
defer root.DecRef()
@@ -265,12 +208,11 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
Start: root,
Path: fspath.Parse(submount.Destination),
}
-
- fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount)
+ fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount)
if err != nil {
return fmt.Errorf("mountOptions failed: %w", err)
}
- if fsName == "" {
+ if len(fsName) == 0 {
// Filesystem is not supported (e.g. cgroup), just skip it.
return nil
}
@@ -278,17 +220,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil {
return err
}
-
- opts := &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- Data: strings.Join(options, ","),
- },
- InternalMount: true,
- }
-
- // All writes go to upper, be paranoid and make lower readonly.
- opts.ReadOnly = useOverlay
-
if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
}
@@ -298,17 +229,13 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
// used for mounts.
-func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
var (
- fsName string
- opts []string
- useOverlay bool
+ fsName string
+ data []string
)
- if isBindMount(m.Mount) {
- m.Type = bind
- }
-
+ // Find filesystem name and FS specific data field.
switch m.Type {
case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
fsName = m.Type
@@ -318,21 +245,46 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndF
fsName = m.Type
var err error
- opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
if err != nil {
- return "", nil, false, err
+ return "", nil, err
}
case bind:
fsName = gofer.Name
- opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
- // If configured, add overlay to all writable mounts.
- useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
+ data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
default:
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
- return fsName, opts, useOverlay, nil
+
+ opts := &vfs.MountOptions{
+ GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: strings.Join(data, ","),
+ },
+ InternalMount: true,
+ }
+
+ for _, o := range m.Options {
+ switch o {
+ case "rw":
+ opts.ReadOnly = false
+ case "ro":
+ opts.ReadOnly = true
+ case "noatime":
+ // TODO(gvisor.dev/issue/1193): Implement MS_NOATIME.
+ case "noexec":
+ opts.Flags.NoExec = true
+ default:
+ log.Warningf("ignoring unknown mount option %q", o)
+ }
+ }
+
+ if conf.Overlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ opts.ReadOnly = true
+ }
+ return fsName, opts, nil
}
func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
@@ -361,3 +313,63 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s
}
return nil
}
+
+// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
+// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
+// the host /tmp, but this is a nice optimization, and fixes some apps that call
+// mknod in /tmp. It's unsafe to mount tmpfs if:
+// 1. /tmp is mounted explicitly: we should not override user's wish
+// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
+//
+// Note that when there are submounts inside of '/tmp', directories for the
+// mount points must be present, making '/tmp' not empty anymore.
+func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
+ for _, m := range c.mounts {
+ // m.Destination has been cleaned, so it's to use equality here.
+ if m.Destination == "/tmp" {
+ log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m)
+ return nil
+ }
+ }
+
+ root := mns.Root()
+ defer root.DecRef()
+ pop := vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse("/tmp"),
+ }
+ // TODO(gvisor.dev/issue/2782): Use O_PATH when available.
+ statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{})
+ switch err {
+ case nil:
+ // Found '/tmp' in filesystem, check if it's empty.
+ if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory {
+ // Not a dir?! Leave it be.
+ return nil
+ }
+ if statx.Nlink > 2 {
+ // If more than "." and ".." is found, skip internal tmpfs to prevent
+ // hiding existing files.
+ log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`)
+ return nil
+ }
+ log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`)
+ fallthrough
+
+ case syserror.ENOENT:
+ // No '/tmp' found (or fallthrough from above). It's safe to mount internal
+ // tmpfs.
+ tmpMount := specs.Mount{
+ Type: tmpfs.Name,
+ Destination: "/tmp",
+ // Sticky bit is added to prevent accidental deletion of files from
+ // another user. This is normally done for /tmp.
+ Options: []string{"mode=01777"},
+ }
+ return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+
+ default:
+ return fmt.Errorf(`stating "/tmp" inside container: %w`, err)
+ }
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index d4c7bdfbb..c087e1a3c 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -7,8 +7,8 @@ go_library(
srcs = ["cgroup.go"],
visibility = ["//:sandbox"],
deps = [
+ "//pkg/cleanup",
"//pkg/log",
- "//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 19c8b0db6..ef01820ef 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -31,8 +31,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -246,7 +246,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
// The Cleanup object cleans up partially created cgroups when an error occurs.
// Errors occuring during cleanup itself are ignored.
- clean := specutils.MakeCleanup(func() { _ = c.Uninstall() })
+ clean := cleanup.Make(func() { _ = c.Uninstall() })
defer clean.Clean()
for key, cfg := range controllers {
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 4c2ac6ff0..01204ab4d 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -136,7 +136,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
// Ensure that if there is a panic, all goroutine stacks are printed.
- debug.SetTraceback("all")
+ debug.SetTraceback("system")
conf := args[0].(*boot.Config)
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 28f0d54b9..10448a759 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Start with root mount, then add any other additional mount as needed.
ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
- ROMount: spec.Root.Readonly,
+ ROMount: spec.Root.Readonly || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
})
if err != nil {
@@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
cfg := fsgofer.Config{
- ROMount: isReadonlyMount(m.Options),
+ ROMount: isReadonlyMount(m.Options) || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
HostUDS: conf.FSGoferHostUDS,
}
diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go
index 8e2b36e85..a2b0a4b14 100644
--- a/runsc/cmd/spec.go
+++ b/runsc/cmd/spec.go
@@ -16,6 +16,7 @@ package cmd
import (
"context"
+ "fmt"
"io/ioutil"
"os"
"path/filepath"
@@ -24,7 +25,8 @@ import (
"gvisor.dev/gvisor/runsc/flag"
)
-var specTemplate = []byte(`{
+func genSpec(cwd string) []byte {
+ var template = fmt.Sprintf(`{
"ociVersion": "1.0.0",
"process": {
"terminal": true,
@@ -39,7 +41,7 @@ var specTemplate = []byte(`{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
- "cwd": "/",
+ "cwd": "%s",
"capabilities": {
"bounding": [
"CAP_AUDIT_WRITE",
@@ -123,11 +125,15 @@ var specTemplate = []byte(`{
}
]
}
-}`)
+}`, cwd)
+
+ return []byte(template)
+}
// Spec implements subcommands.Command for the "spec" command.
type Spec struct {
bundle string
+ cwd string
}
// Name implements subcommands.Command.Name.
@@ -165,6 +171,8 @@ EXAMPLE:
// SetFlags implements subcommands.Command.SetFlags.
func (s *Spec) SetFlags(f *flag.FlagSet) {
f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle")
+ f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+
+ "this value MUST be an absolute path")
}
// Execute implements subcommands.Command.Execute.
@@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("file %q already exists", confPath)
}
- if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil {
+ var spec = genSpec(s.cwd)
+
+ if err := ioutil.WriteFile(confPath, spec, 0664); err != nil {
Fatalf("writing to %q: %v", confPath, err)
}
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 46154df60..49cfb0837 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -16,6 +16,7 @@ go_library(
],
deps = [
"//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/sighandling",
@@ -46,13 +47,14 @@ go_test(
"//test/cmd/test_app",
],
library = ":container",
- shard_count = 5,
+ shard_count = 10,
tags = [
"requires-kvm",
],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/kernel",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 294dca5e7..3813c6b93 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -119,7 +119,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestConsoleSocket(t *testing.T) {
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("true")
_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 8539f252d..6d297d0df 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -31,6 +31,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/sighandling"
@@ -293,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
}
// The Cleanup object cleans up partially created containers when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- cu := specutils.MakeCleanup(func() { _ = c.Destroy() })
+ cu := cleanup.Make(func() { _ = c.Destroy() })
defer cu.Clean()
// Lock the container metadata file to prevent concurrent creations of
@@ -402,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error {
if err := c.Saver.lock(); err != nil {
return err
}
- unlock := specutils.MakeCleanup(func() { c.Saver.unlock() })
+ unlock := cleanup.Make(func() { c.Saver.unlock() })
defer unlock.Clean()
if err := c.requireStatus("start", Created); err != nil {
@@ -506,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
}
// Clean up partially created container if an error occurs.
// Any errors returned by Destroy() itself are ignored.
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
c.Destroy()
})
defer cu.Clean()
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 1a6d50d0d..e7715b6f7 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -256,8 +256,6 @@ var (
func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
// Always load the default config.
cs := make(map[string]*boot.Config)
- cs["default"] = testutil.TestConfig(t)
-
for _, o := range opts {
switch o {
case overlay:
@@ -285,9 +283,16 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
vfs1 := configs(t, opts...)
- vfs2 := configs(t, opts...)
- for key, value := range vfs2 {
+ var optsVFS2 []configOption
+ for _, opt := range opts {
+ // TODO(gvisor.dev/issue/1487): Enable overlay tests.
+ if opt != overlay {
+ optsVFS2 = append(optsVFS2, opt)
+ }
+ }
+
+ for key, value := range configs(t, optsVFS2...) {
value.VFS2 = true
vfs1[key+"VFS2"] = value
}
@@ -603,7 +608,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) {
// TestExec verifies that a container can exec a new program.
func TestExec(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
const uid = 343
spec := testutil.NewSpecWithArgs("sleep", "100")
@@ -695,7 +700,7 @@ func TestExec(t *testing.T) {
// TestKillPid verifies that we can signal individual exec'd processes.
func TestKillPid(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
@@ -1211,7 +1216,7 @@ func TestCapabilities(t *testing.T) {
uid := auth.KUID(os.Getuid() + 1)
gid := auth.KGID(os.Getgid() + 1)
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "100")
rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
@@ -1409,7 +1414,7 @@ func TestReadonlyRoot(t *testing.T) {
}
func TestUIDMap(t *testing.T) {
- for name, conf := range configs(t, noOverlay...) {
+ for name, conf := range configsWithVFS2(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
if err != nil {
@@ -1537,28 +1542,6 @@ func TestReadonlyMount(t *testing.T) {
}
}
-func TestBindMountByOption(t *testing.T) {
- for _, conf := range configs(t, overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount")
- spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: dir,
- Source: dir,
- Type: "none",
- Options: []string{"rw", "bind"},
- })
-
- if err := run(spec, conf); err != nil {
- t.Fatalf("error running sandbox: %v", err)
- }
- }
-}
-
// TestAbbreviatedIDs checks that runsc supports using abbreviated container
// IDs in place of full IDs.
func TestAbbreviatedIDs(t *testing.T) {
@@ -1908,7 +1891,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) {
}
func TestCreateWorkingDir(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
if err != nil {
@@ -2031,7 +2014,7 @@ func TestMountPropagation(t *testing.T) {
}
func TestMountSymlink(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
if err != nil {
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index f6861b1dd..207206dd2 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -27,6 +27,7 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
@@ -64,29 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.")
}
- var (
- containers []*Container
- cleanups []func()
- )
- cleanups = append(cleanups, func() {
- for _, c := range containers {
- c.Destroy()
- }
- })
- cleanupAll := func() {
- for _, c := range cleanups {
- c()
- }
- }
- localClean := specutils.MakeCleanup(cleanupAll)
- defer localClean.Clean()
+ cu := cleanup.Cleanup{}
+ defer cu.Clean()
+ var containers []*Container
for i, spec := range specs {
bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
if err != nil {
return nil, nil, fmt.Errorf("error setting up container: %v", err)
}
- cleanups = append(cleanups, cleanup)
+ cu.Add(cleanup)
args := Args{
ID: ids[i],
@@ -97,6 +85,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
if err != nil {
return nil, nil, fmt.Errorf("error creating container: %v", err)
}
+ cu.Add(func() { cont.Destroy() })
containers = append(containers, cont)
if err := cont.Start(conf); err != nil {
@@ -104,8 +93,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
}
}
- localClean.Release()
- return containers, cleanupAll, nil
+ return containers, cu.Release(), nil
}
type execDesc struct {
@@ -141,7 +129,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1394,7 +1382,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
Destination: "/mydir/test",
Source: "/some/dir",
Type: "tmpfs",
- Options: []string{"rw", "relatime"},
+ Options: []string{"rw", "rbind", "relatime"},
}
podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 64a406ae2..1036b0630 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -13,12 +13,12 @@ go_library(
visibility = ["//runsc:__subpackages__"],
deps = [
"//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/fd",
"//pkg/log",
"//pkg/p9",
"//pkg/sync",
"//pkg/syserr",
- "//runsc/specutils",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 1942f50d7..edc239013 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -33,11 +33,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
if err != nil {
return nil, nil, p9.QID{}, 0, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
child.Close()
// Best effort attempt to remove the file in case of failure.
if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
@@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the dir in case of failure.
if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil {
log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -864,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the symlink in case of failure.
if err := syscall.Unlinkat(l.file.FD(), newName); err != nil {
log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err)
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c95d50294..035dcd3e3 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -13,6 +13,7 @@ go_library(
"//runsc:__subpackages__",
],
deps = [
+ "//pkg/cleanup",
"//pkg/control/client",
"//pkg/control/server",
"//pkg/log",
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index e4ec16e2f..6e1a2af25 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -30,6 +30,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/log"
@@ -119,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- c := specutils.MakeCleanup(func() {
+ c := cleanup.Make(func() {
err := s.destroy()
log.Warningf("error destroying sandbox: %v", err)
})
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 60bb7b7ee..23001d67c 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -18,6 +18,7 @@ import (
"fmt"
"os"
"os/exec"
+ "os/signal"
"path/filepath"
"runtime"
"syscall"
@@ -261,7 +262,18 @@ func MaybeRunAsRoot() error {
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
+ if err := cmd.Start(); err != nil {
+ return fmt.Errorf("re-executing self: %w", err)
+ }
+ ch := make(chan os.Signal, 1)
+ signal.Notify(ch)
+ go func() {
+ for {
+ // Forward all signals to child process.
+ cmd.Process.Signal(<-ch)
+ }
+ }()
+ if err := cmd.Wait(); err != nil {
if exit, ok := err.(*exec.ExitError); ok {
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
os.Exit(ws.ExitStatus())
@@ -269,7 +281,7 @@ func MaybeRunAsRoot() error {
log.Warningf("No wait status provided, exiting with -1: %v", err)
os.Exit(-1)
}
- return fmt.Errorf("re-executing self: %v", err)
+ return err
}
// Child completed with success.
os.Exit(0)
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 202518b58..f1fa573c5 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -311,19 +311,7 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
// Is9PMount returns true if the given mount can be mounted as an external gofer.
func Is9PMount(m specs.Mount) bool {
- var isBind bool
- switch m.Type {
- case "bind":
- isBind = true
- default:
- for _, opt := range m.Options {
- if opt == "bind" || opt == "rbind" {
- isBind = true
- break
- }
- }
- }
- return isBind && m.Source != "" && IsSupportedDevMount(m)
+ return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m)
}
// IsSupportedDevMount returns true if the mount is a supported /dev mount.
@@ -456,36 +444,6 @@ func ContainsStr(strs []string, str string) bool {
return false
}
-// Cleanup allows defers to be aborted when cleanup needs to happen
-// conditionally. Usage:
-// c := MakeCleanup(func() { f.Close() })
-// defer c.Clean() // any failure before release is called will close the file.
-// ...
-// c.Release() // on success, aborts closing the file and return it.
-// return f
-type Cleanup struct {
- clean func()
-}
-
-// MakeCleanup creates a new Cleanup object.
-func MakeCleanup(f func()) Cleanup {
- return Cleanup{clean: f}
-}
-
-// Clean calls the cleanup function.
-func (c *Cleanup) Clean() {
- if c.clean != nil {
- c.clean()
- c.clean = nil
- }
-}
-
-// Release releases the cleanup from its duties, i.e. cleanup function is not
-// called after this point.
-func (c *Cleanup) Release() {
- c.clean = nil
-}
-
// RetryEintr retries the function until an error different than EINTR is
// returned.
func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
diff --git a/scripts/build.sh b/scripts/build.sh
deleted file mode 100755
index e821e8624..000000000
--- a/scripts/build.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-
-# Copyright 2018 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-source $(dirname $0)/common.sh
-
-# Build runsc.
-runsc=$(build -c opt //runsc)
-
-# Build packages.
-pkgs=$(build -c opt //runsc:runsc-debian)
-
-# Stop here if we have no artifacts directory.
-[[ -v KOKORO_ARTIFACTS_DIR ]] || exit 0
-
-# install_raw installs raw artifacts.
-install_raw() {
- mkdir -p "$1"
- cp -f "${runsc}" "$1"/runsc
- sha512sum "$1"/runsc | awk '{print $1 " runsc"}' > "$1"/runsc.sha512
-}
-
-# Build a repository, if the key is available.
-#
-# Note that make_repository.sh script will install packages into the provided
-# root, but will output to stdout a directory that can be copied arbitrarily
-# into "${KOKORO_ARTIFACTS_DIR}"/dists/XXX. We do things this way because we
-# will copy the same repository structure into multiple locations, below.
-if [[ -v KOKORO_REPO_KEY ]]; then
- repo=$(tools/make_repository.sh \
- "${KOKORO_KEYSTORE_DIR}/${KOKORO_REPO_KEY}" \
- gvisor-bot@google.com \
- "${KOKORO_ARTIFACTS_DIR}" \
- ${pkgs})
-fi
-
-# install_repo installs a repository.
-#
-# Note that packages are already installed, as noted above.
-install_repo() {
- if [[ -v repo ]]; then
- rm -rf "$1" && mkdir -p "$(dirname "$1")" && cp -a "${repo}" "$1"
- fi
-}
-
-# If nightly, install only nightly artifacts.
-if [[ "${KOKORO_BUILD_NIGHTLY:-false}" == "true" ]]; then
- # The "latest" directory and current date.
- stamp="$(date -Idate)"
- install_raw "${KOKORO_ARTIFACTS_DIR}/nightly/latest"
- install_raw "${KOKORO_ARTIFACTS_DIR}/nightly/${stamp}"
- install_repo "${KOKORO_ARTIFACTS_DIR}/dists/nightly"
-else
- # Is it a tagged release? Build that.
- tags="$(git tag --points-at HEAD)"
- if ! [[ -z "${tags}" ]]; then
- # Note that a given commit can match any number of tags. We have to iterate
- # through all possible tags and produce associated artifacts.
- for tag in ${tags}; do
- name=$(echo "${tag}" | cut -d'-' -f2)
- base=$(echo "${name}" | cut -d'.' -f1)
- install_raw "${KOKORO_ARTIFACTS_DIR}/release/${name}"
- install_raw "${KOKORO_ARTIFACTS_DIR}/release/latest"
- install_repo "${KOKORO_ARTIFACTS_DIR}/dists/release"
- install_repo "${KOKORO_ARTIFACTS_DIR}/dists/${base}"
- done
- else
- # Otherwise, assume it is a raw master commit.
- install_raw "${KOKORO_ARTIFACTS_DIR}/master/latest"
- install_repo "${KOKORO_ARTIFACTS_DIR}/dists/master"
- fi
-fi
diff --git a/scripts/release.sh b/scripts/release.sh
deleted file mode 100755
index ac7eff3ef..000000000
--- a/scripts/release.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Copyright 2018 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cd $(dirname $0)/..
-source scripts/common.sh
-
-# Tag a release only if provided.
-if ! [[ -v KOKORO_RELEASE_COMMIT ]]; then
- echo "No KOKORO_RELEASE_COMMIT provided." >&2
- exit 1
-fi
-if ! [[ -v KOKORO_RELEASE_TAG ]]; then
- echo "No KOKORO_RELEASE_TAG provided." >&2
- exit 1
-fi
-if ! [[ -v KOKORO_RELNOTES ]]; then
- echo "No KOKORO_RELNOTES provided." >&2
- exit 1
-fi
-if ! [[ -r "${KOKORO_ARTIFACTS_DIR}/${KOKORO_RELNOTES}" ]]; then
- echo "The file '${KOKORO_ARTIFACTS_DIR}/${KOKORO_RELNOTES}' is not readable." >&2
- exit 1
-fi
-
-# Unless an explicit releaser is provided, use the bot e-mail.
-declare -r KOKORO_RELEASE_AUTHOR=${KOKORO_RELEASE_AUTHOR:-gvisor-bot}
-declare -r EMAIL=${EMAIL:-${KOKORO_RELEASE_AUTHOR}@google.com}
-
-# Ensure we have an appropriate configuration for the tag.
-git config --get user.name || git config user.name "gVisor-bot"
-git config --get user.email || git config user.email "${EMAIL}"
-
-# Provide a credential if available.
-if [[ -v KOKORO_GITHUB_ACCESS_TOKEN ]]; then
- git config --global credential.helper cache
- git credential approve <<EOF
-protocol=https
-host=github.com
-username=$(cat "${KOKORO_KEYSTORE_DIR}/${KOKORO_GITHUB_ACCESS_TOKEN}")
-password=x-oauth-basic
-EOF
-fi
-
-# Run the release tool, which pushes to the origin repository.
-tools/tag_release.sh \
- "${KOKORO_RELEASE_COMMIT}" \
- "${KOKORO_RELEASE_TAG}" \
- "${KOKORO_ARTIFACTS_DIR}/${KOKORO_RELNOTES}"
diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go
index 41e0cfa8d..872021358 100644
--- a/test/iptables/filter_input.go
+++ b/test/iptables/filter_input.go
@@ -49,6 +49,8 @@ func init() {
RegisterTestCase(FilterInputJumpTwice{})
RegisterTestCase(FilterInputDestination{})
RegisterTestCase(FilterInputInvertDestination{})
+ RegisterTestCase(FilterInputSource{})
+ RegisterTestCase(FilterInputInvertSource{})
}
// FilterInputDropUDP tests that we can drop UDP traffic.
@@ -667,3 +669,61 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP) error {
func (FilterInputInvertDestination) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}
+
+// FilterInputSource verifies that we can filter packets via `-s
+// <ipaddr>`.
+type FilterInputSource struct{}
+
+// Name implements TestCase.Name.
+func (FilterInputSource) Name() string {
+ return "FilterInputSource"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterInputSource) ContainerAction(ip net.IP) error {
+ // Make INPUT's default action DROP, then ACCEPT all packets from this
+ // machine.
+ rules := [][]string{
+ {"-P", "INPUT", "DROP"},
+ {"-A", "INPUT", "-s", fmt.Sprintf("%v", ip), "-j", "ACCEPT"},
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterInputSource) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// FilterInputInvertSource verifies that we can filter packets via `! -s
+// <ipaddr>`.
+type FilterInputInvertSource struct{}
+
+// Name implements TestCase.Name.
+func (FilterInputInvertSource) Name() string {
+ return "FilterInputInvertSource"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterInputInvertSource) ContainerAction(ip net.IP) error {
+ // Make INPUT's default action DROP, then ACCEPT all packets not bound
+ // for 127.0.0.1.
+ rules := [][]string{
+ {"-P", "INPUT", "DROP"},
+ {"-A", "INPUT", "!", "-s", localIP, "-j", "ACCEPT"},
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterInputInvertSource) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go
index 4fd2cb46a..172ad9e16 100644
--- a/test/iptables/iptables_test.go
+++ b/test/iptables/iptables_test.go
@@ -302,3 +302,11 @@ func TestNATPreRedirectInvert(t *testing.T) {
func TestNATRedirectRequiresProtocol(t *testing.T) {
singleTest(t, NATRedirectRequiresProtocol{})
}
+
+func TestInputSource(t *testing.T) {
+ singleTest(t, FilterInputSource{})
+}
+
+func TestInputInvertSource(t *testing.T) {
+ singleTest(t, FilterInputInvertSource{})
+}
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index dc3024f44..a1a5c3612 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -61,7 +61,7 @@
}
::grpc::Status proto_to_sockaddr(const posix_server::Sockaddr &sockaddr_proto,
- sockaddr_storage *addr) {
+ sockaddr_storage *addr, socklen_t *addr_len) {
switch (sockaddr_proto.sockaddr_case()) {
case posix_server::Sockaddr::SockaddrCase::kIn: {
auto proto_in = sockaddr_proto.in();
@@ -74,6 +74,7 @@
addr_in->sin_port = htons(proto_in.port());
proto_in.addr().copy(reinterpret_cast<char *>(&addr_in->sin_addr.s_addr),
4);
+ *addr_len = sizeof(*addr_in);
break;
}
case posix_server::Sockaddr::SockaddrCase::kIn6: {
@@ -89,6 +90,7 @@
proto_in6.addr().copy(
reinterpret_cast<char *>(&addr_in6->sin6_addr.s6_addr), 16);
addr_in6->sin6_scope_id = htonl(proto_in6.scope_id());
+ *addr_len = sizeof(*addr_in6);
break;
}
case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET:
@@ -120,13 +122,14 @@ class PosixImpl final : public posix_server::Posix::Service {
}
sockaddr_storage addr;
- auto err = proto_to_sockaddr(request->addr(), &addr);
+ socklen_t addr_len;
+ auto err = proto_to_sockaddr(request->addr(), &addr, &addr_len);
if (!err.ok()) {
return err;
}
- response->set_ret(bind(request->sockfd(),
- reinterpret_cast<sockaddr *>(&addr), sizeof(addr)));
+ response->set_ret(
+ bind(request->sockfd(), reinterpret_cast<sockaddr *>(&addr), addr_len));
response->set_errno_(errno);
return ::grpc::Status::OK;
}
@@ -147,13 +150,22 @@ class PosixImpl final : public posix_server::Posix::Service {
"Missing address");
}
sockaddr_storage addr;
- auto err = proto_to_sockaddr(request->addr(), &addr);
+ socklen_t addr_len;
+ auto err = proto_to_sockaddr(request->addr(), &addr, &addr_len);
if (!err.ok()) {
return err;
}
- response->set_ret(connect(
- request->sockfd(), reinterpret_cast<sockaddr *>(&addr), sizeof(addr)));
+ response->set_ret(connect(request->sockfd(),
+ reinterpret_cast<sockaddr *>(&addr), addr_len));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status Fcntl(grpc_impl::ServerContext *context,
+ const ::posix_server::FcntlRequest *request,
+ ::posix_server::FcntlResponse *response) override {
+ response->set_ret(::fcntl(request->fd(), request->cmd(), request->arg()));
response->set_errno_(errno);
return ::grpc::Status::OK;
}
@@ -237,14 +249,15 @@ class PosixImpl final : public posix_server::Posix::Service {
"Missing address");
}
sockaddr_storage addr;
- auto err = proto_to_sockaddr(request->dest_addr(), &addr);
+ socklen_t addr_len;
+ auto err = proto_to_sockaddr(request->dest_addr(), &addr, &addr_len);
if (!err.ok()) {
return err;
}
- response->set_ret(::sendto(
- request->sockfd(), request->buf().data(), request->buf().size(),
- request->flags(), reinterpret_cast<sockaddr *>(&addr), sizeof(addr)));
+ response->set_ret(::sendto(request->sockfd(), request->buf().data(),
+ request->buf().size(), request->flags(),
+ reinterpret_cast<sockaddr *>(&addr), addr_len));
response->set_errno_(errno);
return ::grpc::Status::OK;
}
diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto
index 9dca563f1..77da0fb3a 100644
--- a/test/packetimpact/proto/posix_server.proto
+++ b/test/packetimpact/proto/posix_server.proto
@@ -91,6 +91,17 @@ message ConnectResponse {
int32 errno_ = 2; // "errno" may fail to compile in c++.
}
+message FcntlRequest {
+ int32 fd = 1;
+ int32 cmd = 2;
+ int32 arg = 3;
+}
+
+message FcntlResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
message GetSockNameRequest {
int32 sockfd = 1;
}
@@ -198,6 +209,8 @@ service Posix {
rpc Close(CloseRequest) returns (CloseResponse);
// Call connect() on the DUT.
rpc Connect(ConnectRequest) returns (ConnectResponse);
+ // Call fcntl() on the DUT.
+ rpc Fcntl(FcntlRequest) returns (FcntlResponse);
// Call getsockname() on the DUT.
rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse);
// Call getsockopt() on the DUT.
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index ac13c8543..e58a1fb1b 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -18,9 +18,12 @@ package packetimpact_test
import (
"flag"
"fmt"
+ "io/ioutil"
"log"
"math/rand"
"net"
+ "os"
+ "os/exec"
"path"
"strings"
"testing"
@@ -117,10 +120,18 @@ func TestOne(t *testing.T) {
}(dn)
}
+ tmpDir, err := ioutil.TempDir("", "container-output")
+ if err != nil {
+ t.Fatal("creating temp dir:", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ const testOutputDir = "/tmp/testoutput"
+
runOpts := dockerutil.RunOpts{
Image: "packetimpact",
CapAdd: []string{"NET_ADMIN"},
- Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm"},
+ Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir},
Foreground: true,
}
@@ -187,7 +198,10 @@ func TestOne(t *testing.T) {
// Run tcpdump in the test bench unbuffered, without DNS resolution, just on
// the interface with the test packets.
snifferArgs := []string{
- "tcpdump", "-S", "-vvv", "-U", "-n", "-i", testNetDev,
+ "tcpdump",
+ "-S", "-vvv", "-U", "-n",
+ "-i", testNetDev,
+ "-w", testOutputDir + "/dump.pcap",
}
snifferRegex := "tcpdump: listening.*\n"
if *tshark {
@@ -201,6 +215,12 @@ func TestOne(t *testing.T) {
snifferRegex = "Capturing on.*\n"
}
+ defer func() {
+ if err := exec.Command("/bin/cp", "-r", tmpDir, os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR")).Run(); err != nil {
+ t.Error("unable to copy container output files:", err)
+ }
+ }()
+
if err := testbench.Create(runOpts, snifferArgs...); err != nil {
t.Fatalf("unable to create container %s: %s", testbench.Name, err)
}
diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go
index bf104e5ca..6e85d6fab 100644
--- a/test/packetimpact/testbench/connections.go
+++ b/test/packetimpact/testbench/connections.go
@@ -266,14 +266,14 @@ func SeqNumValue(v seqnum.Value) *seqnum.Value {
}
// newTCPState creates a new TCPState.
-func newTCPState(domain int, out, in TCP) (*tcpState, error) {
+func newTCPState(domain int, out, in TCP) (*tcpState, unix.Sockaddr, error) {
portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_STREAM)
if err != nil {
- return nil, err
+ return nil, nil, err
}
localPort, err := portFromSockaddr(localAddr)
if err != nil {
- return nil, err
+ return nil, nil, err
}
s := tcpState{
out: TCP{SrcPort: &localPort},
@@ -283,12 +283,12 @@ func newTCPState(domain int, out, in TCP) (*tcpState, error) {
finSent: false,
}
if err := s.out.merge(&out); err != nil {
- return nil, err
+ return nil, nil, err
}
if err := s.in.merge(&in); err != nil {
- return nil, err
+ return nil, nil, err
}
- return &s, nil
+ return &s, localAddr, nil
}
func (s *tcpState) outgoing() Layer {
@@ -606,7 +606,7 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 {
if err != nil {
t.Fatalf("can't make ipv4State: %s", err)
}
- tcpState, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP)
+ tcpState, localAddr, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP)
if err != nil {
t.Fatalf("can't make tcpState: %s", err)
}
@@ -623,19 +623,41 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 {
layerStates: []layerState{etherState, ipv4State, tcpState},
injector: injector,
sniffer: sniffer,
+ localAddr: localAddr,
t: t,
}
}
-// Handshake performs a TCP 3-way handshake. The input Connection should have a
+// Connect performs a TCP 3-way handshake. The input Connection should have a
// final TCP Layer.
-func (conn *TCPIPv4) Handshake() {
+func (conn *TCPIPv4) Connect() {
+ conn.t.Helper()
+
// Send the SYN.
conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)})
// Wait for the SYN-ACK.
synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
- if synAck == nil {
+ if err != nil {
+ conn.t.Fatalf("didn't get synack during handshake: %s", err)
+ }
+ conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck
+
+ // Send an ACK.
+ conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)})
+}
+
+// ConnectWithOptions performs a TCP 3-way handshake with given TCP options.
+// The input Connection should have a final TCP Layer.
+func (conn *TCPIPv4) ConnectWithOptions(options []byte) {
+ conn.t.Helper()
+
+ // Send the SYN.
+ conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn), Options: options})
+
+ // Wait for the SYN-ACK.
+ synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+ if err != nil {
conn.t.Fatalf("didn't get synack during handshake: %s", err)
}
conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck
@@ -655,6 +677,31 @@ func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duratio
return (*Connection)(conn).ExpectFrame(expected, timeout)
}
+// ExpectNextData attempts to receive the next incoming segment for the
+// connection and expects that to match the given layers.
+//
+// It differs from ExpectData() in that here we are only interested in the next
+// received segment, while ExpectData() can receive multiple segments for the
+// connection until there is a match with given layers or a timeout.
+func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+ // Receive the first incoming TCP segment for this connection.
+ got, err := conn.ExpectData(&TCP{}, nil, timeout)
+ if err != nil {
+ return nil, err
+ }
+
+ expected := make([]Layer, len(conn.layerStates))
+ expected[len(expected)-1] = tcp
+ if payload != nil {
+ expected = append(expected, payload)
+ tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum()) - uint32(payload.Length()))
+ }
+ if !(*Connection)(conn).match(expected, got) {
+ return nil, fmt.Errorf("next frame is not matching %s during %s: got %s", expected, timeout, got)
+ }
+ return got, nil
+}
+
// Send a packet with reasonable defaults. Potentially override the TCP layer in
// the connection with the provided layer and add additionLayers.
func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) {
@@ -703,6 +750,11 @@ func (conn *TCPIPv4) SynAck() *TCP {
return conn.state().synAck
}
+// LocalAddr gets the local socket address of this connection.
+func (conn *TCPIPv4) LocalAddr() unix.Sockaddr {
+ return conn.localAddr
+}
+
// IPv6Conn maintains the state for all the layers in a IPv6 connection.
type IPv6Conn Connection
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
index b919a3c2e..2a2afecb5 100644
--- a/test/packetimpact/testbench/dut.go
+++ b/test/packetimpact/testbench/dut.go
@@ -241,7 +241,9 @@ func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) {
ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
defer cancel()
ret, err := dut.ConnectWithErrno(ctx, fd, sa)
- if ret != 0 {
+ // Ignore 'operation in progress' error that can be returned when the socket
+ // is non-blocking.
+ if err != syscall.Errno(unix.EINPROGRESS) && ret != 0 {
dut.t.Fatalf("failed to connect socket: %s", err)
}
}
@@ -260,6 +262,35 @@ func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr
return resp.GetRet(), syscall.Errno(resp.GetErrno_())
}
+// Fcntl calls fcntl on the DUT and causes a fatal test failure if it
+// doesn't succeed. If more control over the timeout or error handling is
+// needed, use FcntlWithErrno.
+func (dut *DUT) Fcntl(fd, cmd, arg int32) int32 {
+ dut.t.Helper()
+ ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
+ defer cancel()
+ ret, err := dut.FcntlWithErrno(ctx, fd, cmd, arg)
+ if ret == -1 {
+ dut.t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err)
+ }
+ return ret
+}
+
+// FcntlWithErrno calls fcntl on the DUT.
+func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, error) {
+ dut.t.Helper()
+ req := pb.FcntlRequest{
+ Fd: fd,
+ Cmd: cmd,
+ Arg: arg,
+ }
+ resp, err := dut.posixServer.Fcntl(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Fcntl: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
// GetSockName calls getsockname on the DUT and causes a fatal test failure if
// it doesn't succeed. If more control over the timeout or error handling is
// needed, use GetSockNameWithErrno.
@@ -476,6 +507,19 @@ func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, f
return resp.GetRet(), syscall.Errno(resp.GetErrno_())
}
+// SetNonBlocking will set O_NONBLOCK flag for fd if nonblocking
+// is true, otherwise it will clear the flag.
+func (dut *DUT) SetNonBlocking(fd int32, nonblocking bool) {
+ dut.t.Helper()
+ flags := dut.Fcntl(fd, unix.F_GETFL, 0)
+ if nonblocking {
+ flags |= unix.O_NONBLOCK
+ } else {
+ flags &= ^unix.O_NONBLOCK
+ }
+ dut.Fcntl(fd, unix.F_SETFL, flags)
+}
+
func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) {
dut.t.Helper()
req := pb.SetSockOptRequest{
diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go
index 1b0e5b8fc..560c4111b 100644
--- a/test/packetimpact/testbench/layers.go
+++ b/test/packetimpact/testbench/layers.go
@@ -939,6 +939,11 @@ func (l *Payload) ToBytes() ([]byte, error) {
return l.Bytes, nil
}
+// Length returns payload byte length.
+func (l *Payload) Length() int {
+ return l.length()
+}
+
func (l *Payload) match(other Layer) bool {
return equalLayer(l, other)
}
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 3a0e9cb07..2a41ef326 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -16,6 +16,19 @@ packetimpact_go_test(
)
packetimpact_go_test(
+ name = "ipv4_id_uniqueness",
+ srcs = ["ipv4_id_uniqueness_test.go"],
+ # TODO(b/157506701) Fix netstack then remove the line below.
+ expect_netstack_failure = True,
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
name = "udp_recv_multicast",
srcs = ["udp_recv_multicast_test.go"],
# TODO(b/152813495): Fix netstack then remove the line below.
@@ -111,10 +124,8 @@ packetimpact_go_test(
)
packetimpact_go_test(
- name = "tcp_should_piggyback",
- srcs = ["tcp_should_piggyback_test.go"],
- # TODO(b/153680566): Fix netstack then remove the line below.
- expect_netstack_failure = True,
+ name = "tcp_send_window_sizes_piggyback",
+ srcs = ["tcp_send_window_sizes_piggyback_test.go"],
deps = [
"//pkg/tcpip/header",
"//test/packetimpact/testbench",
@@ -157,6 +168,58 @@ packetimpact_go_test(
)
packetimpact_go_test(
+ name = "tcp_queue_receive_in_syn_sent",
+ srcs = ["tcp_queue_receive_in_syn_sent_test.go"],
+ # TODO(b/157658105): Fix netstack then remove the line below.
+ expect_netstack_failure = True,
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
+ name = "tcp_synsent_reset",
+ srcs = ["tcp_synsent_reset_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
+ name = "tcp_synrcvd_reset",
+ srcs = ["tcp_synrcvd_reset_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
+ name = "tcp_splitseg_mss",
+ srcs = ["tcp_splitseg_mss_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
+ name = "tcp_cork_mss",
+ srcs = ["tcp_cork_mss_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_go_test(
name = "icmpv6_param_problem",
srcs = ["icmpv6_param_problem_test.go"],
# TODO(b/153485026): Fix netstack then remove the line below.
diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go
index c26ab78d9..407565078 100644
--- a/test/packetimpact/tests/fin_wait2_timeout_test.go
+++ b/test/packetimpact/tests/fin_wait2_timeout_test.go
@@ -21,11 +21,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestFinWait2Timeout(t *testing.T) {
@@ -37,13 +37,13 @@ func TestFinWait2Timeout(t *testing.T) {
{"WithoutLinger2", false},
} {
t.Run(tt.description, func(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
if tt.linger2 {
@@ -52,21 +52,21 @@ func TestFinWait2Timeout(t *testing.T) {
}
dut.Close(acceptFd)
- if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
t.Fatalf("expected a FIN-ACK within 1 second but got none: %s", err)
}
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
time.Sleep(5 * time.Second)
conn.Drain()
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
if tt.linger2 {
- if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
t.Fatalf("expected a RST packet within a second but got none: %s", err)
}
} else {
- if got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil {
+ if got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil {
t.Fatalf("expected no RST packets within ten seconds but got one: %s", got)
}
}
diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go
index bb1fc26fc..961059fc1 100644
--- a/test/packetimpact/tests/icmpv6_param_problem_test.go
+++ b/test/packetimpact/tests/icmpv6_param_problem_test.go
@@ -21,27 +21,27 @@ import (
"time"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestICMPv6ParamProblemTest sends a packet with a bad next header. The DUT
// should respond with an ICMPv6 Parameter Problem message.
func TestICMPv6ParamProblemTest(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
- conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{})
+ conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{})
defer conn.Close()
- ipv6 := tb.IPv6{
+ ipv6 := testbench.IPv6{
// 254 is reserved and used for experimentation and testing. This should
// cause an error.
- NextHeader: tb.Uint8(254),
+ NextHeader: testbench.Uint8(254),
}
- icmpv6 := tb.ICMPv6{
- Type: tb.ICMPv6Type(header.ICMPv6EchoRequest),
+ icmpv6 := testbench.ICMPv6{
+ Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest),
NDPPayload: []byte("hello world"),
}
@@ -61,14 +61,14 @@ func TestICMPv6ParamProblemTest(t *testing.T) {
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, header.IPv6NextHeaderOffset)
expectedPayload = append(b, expectedPayload...)
- expectedICMPv6 := tb.ICMPv6{
- Type: tb.ICMPv6Type(header.ICMPv6ParamProblem),
+ expectedICMPv6 := testbench.ICMPv6{
+ Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem),
NDPPayload: expectedPayload,
}
- paramProblem := tb.Layers{
- &tb.Ether{},
- &tb.IPv6{},
+ paramProblem := testbench.Layers{
+ &testbench.Ether{},
+ &testbench.IPv6{},
&expectedICMPv6,
}
timeout := time.Second
diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go
new file mode 100644
index 000000000..4efb9829c
--- /dev/null
+++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go
@@ -0,0 +1,111 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipv4_id_uniqueness_test
+
+import (
+ "context"
+ "flag"
+ "fmt"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) {
+ layers, err := conn.ExpectData(expect, expectPayload, time.Second)
+ if err != nil {
+ return 0, fmt.Errorf("failed to receive TCP segment: %s", err)
+ }
+ if len(layers) < 2 {
+ return 0, fmt.Errorf("got packet with layers: %v, expected to have at least 2 layers (link and network)", layers)
+ }
+ ipv4, ok := layers[1].(*testbench.IPv4)
+ if !ok {
+ return 0, fmt.Errorf("got network layer: %T, expected: *IPv4", layers[1])
+ }
+ if *ipv4.Flags&header.IPv4FlagDontFragment != 0 {
+ return 0, fmt.Errorf("got IPv4 DF=1, expected DF=0")
+ }
+ return *ipv4.ID, nil
+}
+
+// RFC 6864 section 4.2 states: "The IPv4 ID of non-atomic datagrams MUST NOT
+// be reused when sending a copy of an earlier non-atomic datagram."
+//
+// This test creates a TCP connection, uses the IP_MTU_DISCOVER socket option
+// to force the DF bit to be 0, and checks that a retransmitted segment has a
+// different IPv4 Identification value than the original segment.
+func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+
+ listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFD)
+
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ conn.Connect()
+ remoteFD, _ := dut.Accept(listenFD)
+ defer dut.Close(remoteFD)
+
+ dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+
+ // TODO(b/129291778) The following socket option clears the DF bit on
+ // IP packets sent over the socket, and is currently not supported by
+ // gVisor. gVisor by default sends packets with DF=0 anyway, so the
+ // socket option being not supported does not affect the operation of
+ // this test. Once the socket option is supported, the following call
+ // can be changed to simply assert success.
+ ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT)
+ if ret == -1 && errno != unix.ENOTSUP {
+ t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno)
+ }
+
+ sampleData := []byte("Sample Data")
+ samplePayload := &testbench.Payload{Bytes: sampleData}
+
+ dut.Send(remoteFD, sampleData, 0)
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+ t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err)
+ }
+ // Let the DUT estimate RTO with RTT from the DATA-ACK.
+ // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which
+ // we can skip sending this ACK.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+
+ expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))}
+ dut.Send(remoteFD, sampleData, 0)
+ originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload)
+ if err != nil {
+ t.Fatalf("failed to receive TCP segment: %s", err)
+ }
+
+ retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload)
+ if err != nil {
+ t.Fatalf("failed to receive retransmitted TCP segment: %s", err)
+ }
+ if originalID == retransmitID {
+ t.Fatalf("unexpectedly got retransmitted TCP segment with same IPv4 ID field=%d", originalID)
+ }
+}
diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go
index 70a22a2db..6e7ff41d7 100644
--- a/test/packetimpact/tests/tcp_close_wait_ack_test.go
+++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go
@@ -23,17 +23,17 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestCloseWaitAck(t *testing.T) {
for _, tt := range []struct {
description string
- makeTestingTCP func(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP
+ makeTestingTCP func(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP
seqNumOffset seqnum.Size
expectAck bool
}{
@@ -45,27 +45,27 @@ func TestCloseWaitAck(t *testing.T) {
{"ACK", GenerateUnaccACKSegment, 2, true},
} {
t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
// Send a FIN to DUT to intiate the active close
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
- gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second)
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
+ gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
if err != nil {
t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err)
}
windowSize := seqnum.Size(*gotTCP.WindowSize)
// Send a segment with OTW Seq / unacc ACK and expect an ACK back
- conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &tb.Payload{Bytes: []byte("Sample Data")})
- gotAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second)
+ conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")})
+ gotAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
if tt.expectAck && err != nil {
t.Fatalf("expected an ack but got none: %s", err)
}
@@ -75,14 +75,14 @@ func TestCloseWaitAck(t *testing.T) {
// Now let's verify DUT is indeed in CLOSE_WAIT
dut.Close(acceptFd)
- if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
t.Fatalf("expected DUT to send a FIN: %s", err)
}
// Ack the FIN from DUT
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
// Send some extra data to DUT
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, &tb.Payload{Bytes: []byte("Sample Data")})
- if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")})
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
t.Fatalf("expected DUT to send an RST: %s", err)
}
})
@@ -92,17 +92,17 @@ func TestCloseWaitAck(t *testing.T) {
// This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the
// generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK
// is expected from the receiver.
-func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP {
+func GenerateOTWSeqSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
lastAcceptable := conn.LocalSeqNum().Add(windowSize)
otwSeq := uint32(lastAcceptable.Add(seqNumOffset))
- return tb.TCP{SeqNum: tb.Uint32(otwSeq), Flags: tb.Uint8(header.TCPFlagAck)}
+ return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)}
}
// This generates an segment with acknum = SND.NXT + seqNumOffset, the generated
// segment is only acceptable when seqNumOffset is 0, otherwise an ACK is
// expected from the receiver.
-func GenerateUnaccACKSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP {
+func GenerateUnaccACKSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
lastAcceptable := conn.RemoteSeqNum()
unaccAck := uint32(lastAcceptable.Add(seqNumOffset))
- return tb.TCP{AckNum: tb.Uint32(unaccAck), Flags: tb.Uint8(header.TCPFlagAck)}
+ return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)}
}
diff --git a/test/packetimpact/tests/tcp_cork_mss_test.go b/test/packetimpact/tests/tcp_cork_mss_test.go
new file mode 100644
index 000000000..fb8f48629
--- /dev/null
+++ b/test/packetimpact/tests/tcp_cork_mss_test.go
@@ -0,0 +1,84 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_cork_mss_test
+
+import (
+ "flag"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+// TestTCPCorkMSS tests for segment coalesce and split as per MSS.
+func TestTCPCorkMSS(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+ listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFD)
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ const mss = uint32(header.TCPDefaultMSS)
+ options := make([]byte, header.TCPOptionMSSLength)
+ header.EncodeMSSOption(mss, options)
+ conn.ConnectWithOptions(options)
+
+ acceptFD, _ := dut.Accept(listenFD)
+ defer dut.Close(acceptFD)
+
+ dut.SetSockOptInt(acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1)
+
+ // Let the dut application send 2 small segments to be held up and coalesced
+ // until the application sends a larger segment to fill up to > MSS.
+ sampleData := []byte("Sample Data")
+ dut.Send(acceptFD, sampleData, 0)
+ dut.Send(acceptFD, sampleData, 0)
+
+ expectedData := sampleData
+ expectedData = append(expectedData, sampleData...)
+ largeData := make([]byte, mss+1)
+ expectedData = append(expectedData, largeData...)
+ dut.Send(acceptFD, largeData, 0)
+
+ // Expect the segments to be coalesced and sent and capped to MSS.
+ expectedPayload := testbench.Payload{Bytes: expectedData[:mss]}
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+ // Expect the coalesced segment to be split and transmitted.
+ expectedPayload = testbench.Payload{Bytes: expectedData[mss:]}
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+
+ // Check for segments to *not* be held up because of TCP_CORK when
+ // the current send window is less than MSS.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))})
+ dut.Send(acceptFD, sampleData, 0)
+ dut.Send(acceptFD, sampleData, 0)
+ expectedPayload = testbench.Payload{Bytes: append(sampleData, sampleData...)}
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+}
diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
index 2c1ec27d3..b9b3e91d3 100644
--- a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
+++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
@@ -21,22 +21,22 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestTcpNoAcceptCloseReset(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
- conn.Handshake()
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ conn.Connect()
defer conn.Close()
dut.Close(listenFd)
- if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil {
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil {
t.Fatalf("expected a RST-ACK packet but got none: %s", err)
}
}
diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go
index 351df193e..ad8c74234 100644
--- a/test/packetimpact/tests/tcp_outside_the_window_test.go
+++ b/test/packetimpact/tests/tcp_outside_the_window_test.go
@@ -23,11 +23,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestTCPOutsideTheWindows tests the behavior of the DUT when packets arrive
@@ -38,7 +38,7 @@ func TestTCPOutsideTheWindow(t *testing.T) {
for _, tt := range []struct {
description string
tcpFlags uint8
- payload []tb.Layer
+ payload []testbench.Layer
seqNumOffset seqnum.Size
expectACK bool
}{
@@ -46,28 +46,28 @@ func TestTCPOutsideTheWindow(t *testing.T) {
{"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 0, true},
{"ACK", header.TCPFlagAck, nil, 0, false},
{"FIN", header.TCPFlagFin, nil, 0, false},
- {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 0, true},
+ {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 0, true},
{"SYN", header.TCPFlagSyn, nil, 1, true},
{"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 1, true},
{"ACK", header.TCPFlagAck, nil, 1, true},
{"FIN", header.TCPFlagFin, nil, 1, false},
- {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 1, true},
+ {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 1, true},
{"SYN", header.TCPFlagSyn, nil, 2, true},
{"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 2, true},
{"ACK", header.TCPFlagAck, nil, 2, true},
{"FIN", header.TCPFlagFin, nil, 2, false},
- {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 2, true},
+ {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 2, true},
} {
t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFD)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFD, _ := dut.Accept(listenFD)
defer dut.Close(acceptFD)
@@ -75,13 +75,13 @@ func TestTCPOutsideTheWindow(t *testing.T) {
conn.Drain()
// Ignore whatever incrementing that this out-of-order packet might cause
// to the AckNum.
- localSeqNum := tb.Uint32(uint32(*conn.LocalSeqNum()))
- conn.Send(tb.TCP{
- Flags: tb.Uint8(tt.tcpFlags),
- SeqNum: tb.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))),
+ localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum()))
+ conn.Send(testbench.TCP{
+ Flags: testbench.Uint8(tt.tcpFlags),
+ SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))),
}, tt.payload...)
timeout := 3 * time.Second
- gotACK, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout)
+ gotACK, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout)
if tt.expectACK && err != nil {
t.Fatalf("expected an ACK packet within %s but got none: %s", timeout, err)
}
diff --git a/test/packetimpact/tests/tcp_paws_mechanism_test.go b/test/packetimpact/tests/tcp_paws_mechanism_test.go
index 0a668adcf..55db4ece6 100644
--- a/test/packetimpact/tests/tcp_paws_mechanism_test.go
+++ b/test/packetimpact/tests/tcp_paws_mechanism_test.go
@@ -22,25 +22,25 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestPAWSMechanism(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFD)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
options := make([]byte, header.TCPOptionTSLength)
header.EncodeTSOption(currentTS(), 0, options)
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn), Options: options})
- synAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options})
+ synAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
if err != nil {
t.Fatalf("didn't get synack during handshake: %s", err)
}
@@ -50,7 +50,7 @@ func TestPAWSMechanism(t *testing.T) {
}
tsecr := parsedSynOpts.TSVal
header.EncodeTSOption(currentTS(), tsecr, options)
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options})
acceptFD, _ := dut.Accept(listenFD)
defer dut.Close(acceptFD)
@@ -61,9 +61,9 @@ func TestPAWSMechanism(t *testing.T) {
// every time we send one, it should not cause any flakiness because timestamps
// only need to be non-decreasing.
time.Sleep(3 * time.Millisecond)
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
- gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second)
+ gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
if err != nil {
t.Fatalf("expected an ACK but got none: %s", err)
}
@@ -86,9 +86,9 @@ func TestPAWSMechanism(t *testing.T) {
// 3ms here is chosen arbitrarily and this time.Sleep() should not cause flakiness
// due to the exact same reasoning discussed above.
time.Sleep(3 * time.Millisecond)
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
- gotTCP, err = conn.Expect(tb.TCP{AckNum: lastAckNum, Flags: tb.Uint8(header.TCPFlagAck)}, time.Second)
+ gotTCP, err = conn.Expect(testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
if err != nil {
t.Fatalf("expected segment with AckNum %d but got none: %s", lastAckNum, err)
}
diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
new file mode 100644
index 000000000..b640d8673
--- /dev/null
+++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
@@ -0,0 +1,87 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_queue_receive_in_syn_sent_test
+
+import (
+ "bytes"
+ "context"
+ "encoding/hex"
+ "errors"
+ "flag"
+ "net"
+ "sync"
+ "syscall"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+func TestQueueReceiveInSynSent(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+
+ socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ sampleData := []byte("Sample Data")
+
+ dut.SetNonBlocking(socket, true)
+ if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
+ t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
+ }
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
+ t.Fatalf("expected a SYN from DUT, but got none: %s", err)
+ }
+
+ // Issue RECEIVE call in SYN-SENT, this should be queued for process until the connection
+ // is established.
+ dut.SetNonBlocking(socket, false)
+ var wg sync.WaitGroup
+ defer wg.Wait()
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
+ defer cancel()
+ n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
+ if n == -1 {
+ t.Fatalf("failed to recv on DUT: %s", err)
+ }
+ if got := buff[:n]; !bytes.Equal(got, sampleData) {
+ t.Fatalf("received data don't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData))
+ }
+ }()
+
+ // The following sleep is used to prevent the connection from being established while the
+ // RPC is in flight.
+ time.Sleep(time.Second)
+
+ // Bring the connection to Established.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+ t.Fatalf("expected an ACK from DUT, but got none: %s", err)
+ }
+
+ // Send sample data to DUT.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+}
diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go
index c043ad881..e51409b66 100644
--- a/test/packetimpact/tests/tcp_retransmits_test.go
+++ b/test/packetimpact/tests/tcp_retransmits_test.go
@@ -21,53 +21,53 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestRetransmits tests retransmits occur at exponentially increasing
// time intervals.
func TestRetransmits(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
defer dut.Close(acceptFd)
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
sampleData := []byte("Sample Data")
- samplePayload := &tb.Payload{Bytes: sampleData}
+ samplePayload := &testbench.Payload{Bytes: sampleData}
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
// Give a chance for the dut to estimate RTO with RTT from the DATA-ACK.
// TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which
// we can skip sending this ACK.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
startRTO := time.Second
current := startRTO
first := time.Now()
dut.Send(acceptFd, sampleData, 0)
- seq := tb.Uint32(uint32(*conn.RemoteSeqNum()))
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil {
+ seq := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
// Expect retransmits of the same segment.
for i := 0; i < 5; i++ {
start := time.Now()
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil {
t.Fatalf("expected a packet with payload %v: %s loop %d", samplePayload, err, i)
}
if i == 0 {
diff --git a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go
new file mode 100644
index 000000000..90ab85419
--- /dev/null
+++ b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go
@@ -0,0 +1,105 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_send_window_sizes_piggyback_test
+
+import (
+ "flag"
+ "fmt"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+// TestSendWindowSizesPiggyback tests cases where segment sizes are close to
+// sender window size and checks for ACK piggybacking for each of those case.
+func TestSendWindowSizesPiggyback(t *testing.T) {
+ sampleData := []byte("Sample Data")
+ segmentSize := uint16(len(sampleData))
+ // Advertise receive window sizes that are lesser, equal to or greater than
+ // enqueued segment size and check for segment transmits. The test attempts
+ // to enqueue a segment on the dut before acknowledging previous segment and
+ // lets the dut piggyback any ACKs along with the enqueued segment.
+ for _, tt := range []struct {
+ description string
+ windowSize uint16
+ expectedPayload1 []byte
+ expectedPayload2 []byte
+ enqueue bool
+ }{
+ // Expect the first segment to be split as it cannot be accomodated in
+ // the sender window. This means we need not enqueue a new segment after
+ // the first segment.
+ {"WindowSmallerThanSegment", segmentSize - 1, sampleData[:(segmentSize - 1)], sampleData[(segmentSize - 1):], false /* enqueue */},
+
+ {"WindowEqualToSegment", segmentSize, sampleData, sampleData, true /* enqueue */},
+
+ // Expect the second segment to not be split as its size is greater than
+ // the available sender window size. The segments should not be split
+ // when there is pending unacknowledged data and the segment-size is
+ // greater than available sender window.
+ {"WindowGreaterThanSegment", segmentSize + 1, sampleData, sampleData, true /* enqueue */},
+ } {
+ t.Run(fmt.Sprintf("%s%d", tt.description, tt.windowSize), func(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+ listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFd)
+
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort, WindowSize: testbench.Uint16(tt.windowSize)}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ conn.Connect()
+ acceptFd, _ := dut.Accept(listenFd)
+ defer dut.Close(acceptFd)
+
+ dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+
+ expectedTCP := testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}
+
+ dut.Send(acceptFd, sampleData, 0)
+ expectedPayload := testbench.Payload{Bytes: tt.expectedPayload1}
+ if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+
+ // Expect any enqueued segment to be transmitted by the dut along with
+ // piggybacked ACK for our data.
+
+ if tt.enqueue {
+ // Enqueue a segment for the dut to transmit.
+ dut.Send(acceptFd, sampleData, 0)
+ }
+
+ // Send ACK for the previous segment along with data for the dut to
+ // receive and ACK back. Sending this ACK would make room for the dut
+ // to transmit any enqueued segment.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData})
+
+ // Expect the dut to piggyback the ACK for received data along with
+ // the segment enqueued for transmit.
+ expectedPayload = testbench.Payload{Bytes: tt.expectedPayload2}
+ if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ })
+ }
+}
diff --git a/test/packetimpact/tests/tcp_should_piggyback_test.go b/test/packetimpact/tests/tcp_should_piggyback_test.go
deleted file mode 100644
index 0240dc2f9..000000000
--- a/test/packetimpact/tests/tcp_should_piggyback_test.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_should_piggyback_test
-
-import (
- "flag"
- "testing"
- "time"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
- tb.RegisterFlags(flag.CommandLine)
-}
-
-func TestPiggyback(t *testing.T) {
- dut := tb.NewDUT(t)
- defer dut.TearDown()
- listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
- defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort, WindowSize: tb.Uint16(12)}, tb.TCP{SrcPort: &remotePort})
- defer conn.Close()
-
- conn.Handshake()
- acceptFd, _ := dut.Accept(listenFd)
- defer dut.Close(acceptFd)
-
- dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
-
- sampleData := []byte("Sample Data")
-
- dut.Send(acceptFd, sampleData, 0)
- expectedTCP := tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}
- expectedPayload := tb.Payload{Bytes: sampleData}
- if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
- t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err)
- }
-
- // Cause DUT to send us more data as soon as we ACK their first data segment because we have
- // a small window.
- dut.Send(acceptFd, sampleData, 0)
-
- // DUT should ACK our segment by piggybacking ACK to their outstanding data segment instead of
- // sending a separate ACK packet.
- conn.Send(expectedTCP, &expectedPayload)
- if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
- t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err)
- }
-}
diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go
new file mode 100644
index 000000000..9350d0988
--- /dev/null
+++ b/test/packetimpact/tests/tcp_splitseg_mss_test.go
@@ -0,0 +1,71 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_splitseg_mss_test
+
+import (
+ "flag"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS.
+// It tests if the transmitted segments are capped at MSS and are split.
+func TestTCPSplitSegMSS(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+ listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFD)
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ const mss = uint32(header.TCPDefaultMSS)
+ options := make([]byte, header.TCPOptionMSSLength)
+ header.EncodeMSSOption(mss, options)
+ conn.ConnectWithOptions(options)
+
+ acceptFD, _ := dut.Accept(listenFD)
+ defer dut.Close(acceptFD)
+
+ // Let the dut send a segment larger than MSS.
+ largeData := make([]byte, mss+1)
+ for i := 0; i < 2; i++ {
+ dut.Send(acceptFD, largeData, 0)
+ if i == 0 {
+ // On Linux, the initial segment goes out beyond MSS and the segment
+ // split occurs on retransmission. Call ExpectData to wait to
+ // receive the split segment.
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ } else {
+ if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ }
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+ if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil {
+ t.Fatalf("expected payload was not received: %s", err)
+ }
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+ }
+}
diff --git a/test/packetimpact/tests/tcp_synrcvd_reset_test.go b/test/packetimpact/tests/tcp_synrcvd_reset_test.go
new file mode 100644
index 000000000..7d5deab01
--- /dev/null
+++ b/test/packetimpact/tests/tcp_synrcvd_reset_test.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_syn_reset_test
+
+import (
+ "flag"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.RegisterFlags(flag.CommandLine)
+}
+
+// TestTCPSynRcvdReset tests transition from SYN-RCVD to CLOSED.
+func TestTCPSynRcvdReset(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
+ listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFD)
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+
+ // Expect dut connection to have transitioned to SYN-RCVD state.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)})
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+ t.Fatalf("expected SYN-ACK %s", err)
+ }
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)})
+ // Expect the connection to have transitioned SYN-RCVD to CLOSED.
+ // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+ t.Fatalf("expected a TCP RST %s", err)
+ }
+}
diff --git a/test/packetimpact/tests/tcp_synsent_reset_test.go b/test/packetimpact/tests/tcp_synsent_reset_test.go
new file mode 100644
index 000000000..6898a2239
--- /dev/null
+++ b/test/packetimpact/tests/tcp_synsent_reset_test.go
@@ -0,0 +1,88 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_synsent_reset_test
+
+import (
+ "flag"
+ "net"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ tb.RegisterFlags(flag.CommandLine)
+}
+
+// dutSynSentState sets up the dut connection in SYN-SENT state.
+func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) {
+ dut := tb.NewDUT(t)
+
+ clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4))
+ port := uint16(9001)
+ conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &port, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &port})
+
+ sa := unix.SockaddrInet4{Port: int(port)}
+ copy(sa.Addr[:], net.IP(net.ParseIP(tb.LocalIPv4)).To4())
+ // Bring the dut to SYN-SENT state with a non-blocking connect.
+ dut.Connect(clientFD, &sa)
+ if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil {
+ t.Fatalf("expected SYN\n")
+ }
+
+ return &dut, &conn, port, clientPort
+}
+
+// TestTCPSynSentReset tests RFC793, p67: SYN-SENT to CLOSED transition.
+func TestTCPSynSentReset(t *testing.T) {
+ dut, conn, _, _ := dutSynSentState(t)
+ defer conn.Close()
+ defer dut.TearDown()
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
+ // Expect the connection to have closed.
+ // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+ t.Fatalf("expected a TCP RST")
+ }
+}
+
+// TestTCPSynSentRcvdReset tests RFC793, p70, SYN-SENT to SYN-RCVD to CLOSED
+// transitions.
+func TestTCPSynSentRcvdReset(t *testing.T) {
+ dut, c, remotePort, clientPort := dutSynSentState(t)
+ defer dut.TearDown()
+ defer c.Close()
+
+ conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &remotePort, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &remotePort})
+ defer conn.Close()
+ // Initiate new SYN connection with the same port pair
+ // (simultaneous open case), expect the dut connection to move to
+ // SYN-RCVD state
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)})
+ if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+ t.Fatalf("expected SYN-ACK %s\n", err)
+ }
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)})
+ // Expect the connection to have transitioned SYN-RCVD to CLOSED.
+ // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+ t.Fatalf("expected a TCP RST")
+ }
+}
diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go
index ce31917ee..87e45d765 100644
--- a/test/packetimpact/tests/tcp_user_timeout_test.go
+++ b/test/packetimpact/tests/tcp_user_timeout_test.go
@@ -22,27 +22,27 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
-func sendPayload(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error {
+func sendPayload(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error {
sampleData := make([]byte, 100)
for i := range sampleData {
sampleData[i] = uint8(i)
}
conn.Drain()
dut.Send(fd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &tb.Payload{Bytes: sampleData}, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil {
return fmt.Errorf("expected data but got none: %w", err)
}
return nil
}
-func sendFIN(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error {
+func sendFIN(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error {
dut.Close(fd)
return nil
}
@@ -59,20 +59,20 @@ func TestTCPUserTimeout(t *testing.T) {
} {
for _, ttf := range []struct {
description string
- f func(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error
+ f func(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error
}{
{"AfterPayload", sendPayload},
{"AfterFIN", sendFIN},
} {
t.Run(tt.description+ttf.description, func(t *testing.T) {
// Create a socket, listen, TCP handshake, and accept.
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFD)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFD, _ := dut.Accept(listenFD)
if tt.userTimeout != 0 {
@@ -85,14 +85,14 @@ func TestTCPUserTimeout(t *testing.T) {
time.Sleep(tt.sendDelay)
conn.Drain()
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
// If TCP_USER_TIMEOUT was set and the above delay was longer than the
// TCP_USER_TIMEOUT then the DUT should send a RST in response to the
// testbench's packet.
expectRST := tt.userTimeout != 0 && tt.sendDelay > tt.userTimeout
expectTimeout := 5 * time.Second
- got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, expectTimeout)
+ got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout)
if expectRST && err != nil {
t.Errorf("expected RST packet within %s but got none: %s", expectTimeout, err)
}
diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go
index 58ec1d740..576ec1a8b 100644
--- a/test/packetimpact/tests/tcp_window_shrink_test.go
+++ b/test/packetimpact/tests/tcp_window_shrink_test.go
@@ -21,53 +21,53 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestWindowShrink(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
defer dut.Close(acceptFd)
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
sampleData := []byte("Sample Data")
- samplePayload := &tb.Payload{Bytes: sampleData}
+ samplePayload := &testbench.Payload{Bytes: sampleData}
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
dut.Send(acceptFd, sampleData, 0)
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
// We close our receiving window here
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
dut.Send(acceptFd, []byte("Sample Data"), 0)
// Note: There is another kind of zero-window probing which Windows uses (by sending one
// new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change
// the following lines.
expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: tb.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %v: %s", expectedRemoteSeqNum, err)
}
}
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
index dd43a24db..54cee138f 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
@@ -21,39 +21,39 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestZeroWindowProbeRetransmit tests retransmits of zero window probes
// to be sent at exponentially inreasing time intervals.
func TestZeroWindowProbeRetransmit(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
defer dut.Close(acceptFd)
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
sampleData := []byte("Sample Data")
- samplePayload := &tb.Payload{Bytes: sampleData}
+ samplePayload := &testbench.Payload{Bytes: sampleData}
// Send and receive sample data to the dut.
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %s", err)
}
@@ -63,9 +63,9 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
// of the recorded first zero probe transmission duration.
//
// Advertize zero receive window again.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
- probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1))
- ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum()))
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+ probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
+ ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
startProbeDuration := time.Second
current := startProbeDuration
@@ -79,7 +79,7 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
// Expect zero-window probe with a timeout which is a function of the typical
// first retransmission time. The retransmission times is supposed to
// exponentially increase.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil {
t.Fatalf("expected a probe with sequence number %v: loop %d", probeSeq, i)
}
if i == 0 {
@@ -92,13 +92,14 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i)
}
// Acknowledge the zero-window probes from the dut.
- conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
+ conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
current *= 2
}
// Advertize non-zero window.
- conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
// Expect the dut to recover and transmit data.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.
+ TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
}
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go
index 6c453505d..c9b3b7af2 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go
@@ -21,41 +21,41 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestZeroWindowProbe tests few cases of zero window probing over the
// same connection.
func TestZeroWindowProbe(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
defer dut.Close(acceptFd)
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
sampleData := []byte("Sample Data")
- samplePayload := &tb.Payload{Bytes: sampleData}
+ samplePayload := &testbench.Payload{Bytes: sampleData}
start := time.Now()
// Send and receive sample data to the dut.
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
sendTime := time.Now().Sub(start)
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %s", err)
}
@@ -63,16 +63,16 @@ func TestZeroWindowProbe(t *testing.T) {
// probe to be sent.
//
// Advertize zero window to the dut.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
// Expected sequence number of the zero window probe.
- probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1))
+ probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
// Expected ack number of the ACK for the probe.
- ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum()))
+ ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
// Expect there are no zero-window probes sent until there is data to be sent out
// from the dut.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil {
t.Fatalf("unexpected a packet with sequence number %v: %s", probeSeq, err)
}
@@ -80,7 +80,7 @@ func TestZeroWindowProbe(t *testing.T) {
// Ask the dut to send out data.
dut.Send(acceptFd, sampleData, 0)
// Expect zero-window probe from the dut.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err)
}
// Expect the probe to be sent after some time. Compare against the previous
@@ -94,9 +94,9 @@ func TestZeroWindowProbe(t *testing.T) {
// and sends out the sample payload after the send window opens.
//
// Advertize non-zero window to the dut and ack the zero window probe.
- conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)})
+ conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
// Expect the dut to recover and transmit data.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
@@ -104,9 +104,9 @@ func TestZeroWindowProbe(t *testing.T) {
// Check if the dut responds as we do for a similar probe sent to it.
// Basically with sequence number to one byte behind the unacknowledged
// sequence number.
- p := tb.Uint32(uint32(*conn.LocalSeqNum()))
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), SeqNum: tb.Uint32(uint32(*conn.LocalSeqNum() - 1))})
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil {
+ p := testbench.Uint32(uint32(*conn.LocalSeqNum()))
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum() - 1))})
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with ack number: %d: %s", p, err)
}
}
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
index 193427fb9..749281d9d 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
@@ -21,39 +21,39 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
// TestZeroWindowProbeUserTimeout sanity tests user timeout when we are
// retransmitting zero window probes.
func TestZeroWindowProbeUserTimeout(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
defer dut.Close(listenFd)
- conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
defer conn.Close()
- conn.Handshake()
+ conn.Connect()
acceptFd, _ := dut.Accept(listenFd)
defer dut.Close(acceptFd)
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
sampleData := []byte("Sample Data")
- samplePayload := &tb.Payload{Bytes: sampleData}
+ samplePayload := &testbench.Payload{Bytes: sampleData}
// Send and receive sample data to the dut.
dut.Send(acceptFd, sampleData, 0)
- if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
}
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %s", err)
}
@@ -61,15 +61,15 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) {
// probe to be sent.
//
// Advertize zero window to the dut.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
// Expected sequence number of the zero window probe.
- probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1))
+ probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
start := time.Now()
// Ask the dut to send out data.
dut.Send(acceptFd, sampleData, 0)
// Expect zero-window probe from the dut.
- if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
+ if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err)
}
// Record the duration for first probe, the dut sends the zero window probe after
@@ -82,7 +82,7 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) {
// Reduce the retransmit timeout.
dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds()))
// Advertize zero window again.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)})
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
// Ask the dut to send out data that would trigger zero window probe retransmissions.
dut.Send(acceptFd, sampleData, 0)
@@ -91,8 +91,8 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) {
// Expect the connection to have timed out and closed which would cause the dut
// to reply with a RST to the ACK we send.
- conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
- if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+ if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
t.Fatalf("expected a TCP RST")
}
}
diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go
index ca4df2ab0..aedabf9de 100644
--- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go
+++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go
@@ -26,11 +26,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip/header"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
type connectionMode bool
@@ -59,12 +59,12 @@ func (e icmpError) String() string {
return "Unknown ICMP error"
}
-func (e icmpError) ToICMPv4() *tb.ICMPv4 {
+func (e icmpError) ToICMPv4() *testbench.ICMPv4 {
switch e {
case portUnreachable:
- return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4DstUnreachable), Code: tb.Uint8(header.ICMPv4PortUnreachable)}
+ return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4PortUnreachable)}
case timeToLiveExceeded:
- return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4TimeExceeded), Code: tb.Uint8(header.ICMPv4TTLExceeded)}
+ return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4TimeExceeded), Code: testbench.Uint8(header.ICMPv4TTLExceeded)}
}
return nil
}
@@ -76,8 +76,8 @@ type errorDetection struct {
}
type testData struct {
- dut *tb.DUT
- conn *tb.UDPIPv4
+ dut *testbench.DUT
+ conn *testbench.UDPIPv4
remoteFD int32
remotePort uint16
cleanFD int32
@@ -95,9 +95,9 @@ func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno {
}
// sendICMPError sends an ICMP error message in response to a UDP datagram.
-func sendICMPError(conn *tb.UDPIPv4, icmpErr icmpError, udp *tb.UDP) error {
+func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) error {
if icmpErr == timeToLiveExceeded {
- ip, ok := udp.Prev().(*tb.IPv4)
+ ip, ok := udp.Prev().(*testbench.IPv4)
if !ok {
return fmt.Errorf("expected %s to be IPv4", udp.Prev())
}
@@ -123,10 +123,10 @@ func sendICMPError(conn *tb.UDPIPv4, icmpErr icmpError, udp *tb.UDP) error {
// first recv should succeed immediately.
func testRecv(ctx context.Context, d testData) error {
// Check that receiving on the clean socket works.
- d.conn.Send(tb.UDP{DstPort: &d.cleanPort})
+ d.conn.Send(testbench.UDP{DstPort: &d.cleanPort})
d.dut.Recv(d.cleanFD, 100, 0)
- d.conn.Send(tb.UDP{})
+ d.conn.Send(testbench.UDP{})
if d.wantErrno != syscall.Errno(0) {
ctx, cancel := context.WithTimeout(ctx, time.Second)
@@ -151,7 +151,7 @@ func testRecv(ctx context.Context, d testData) error {
func testSendTo(ctx context.Context, d testData) error {
// Check that sending on the clean socket works.
d.dut.SendTo(d.cleanFD, nil, 0, d.conn.LocalAddr())
- if _, err := d.conn.Expect(tb.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil {
+ if _, err := d.conn.Expect(testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil {
return fmt.Errorf("did not receive UDP packet from clean socket on DUT: %s", err)
}
@@ -169,7 +169,7 @@ func testSendTo(ctx context.Context, d testData) error {
}
d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr())
- if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil {
+ if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil {
return fmt.Errorf("did not receive UDP packet as expected: %s", err)
}
return nil
@@ -187,7 +187,7 @@ func testSockOpt(_ context.Context, d testData) error {
// Check that after clearing socket error, sending doesn't fail.
d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr())
- if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil {
+ if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil {
return fmt.Errorf("did not receive UDP packet as expected: %s", err)
}
return nil
@@ -223,7 +223,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
errorDetection{"SockOpt", false, testSockOpt},
} {
t.Run(fmt.Sprintf("%s/%s/%s", connect, icmpErr, errDetect.name), func(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
@@ -234,7 +234,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
defer dut.Close(cleanFD)
- conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer conn.Close()
if connect {
@@ -243,7 +243,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
}
dut.SendTo(remoteFD, nil, 0, conn.LocalAddr())
- udp, err := conn.Expect(tb.UDP{}, time.Second)
+ udp, err := conn.Expect(testbench.UDP{}, time.Second)
if err != nil {
t.Fatalf("did not receive message from DUT: %s", err)
}
@@ -258,7 +258,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
// involved in the generation of the ICMP error. As such,
// interactions between it and the the DUT should be independent of
// the ICMP error at least at the port level.
- connClean := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ connClean := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer connClean.Close()
errDetectConn = &connClean
@@ -281,7 +281,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
wantErrno := wantErrno(connect, icmpErr)
t.Run(fmt.Sprintf("%s/%s", connect, icmpErr), func(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
@@ -292,7 +292,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
defer dut.Close(cleanFD)
- conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer conn.Close()
if connect {
@@ -301,7 +301,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
}
dut.SendTo(remoteFD, nil, 0, conn.LocalAddr())
- udp, err := conn.Expect(tb.UDP{}, time.Second)
+ udp, err := conn.Expect(testbench.UDP{}, time.Second)
if err != nil {
t.Fatalf("did not receive message from DUT: %s", err)
}
@@ -355,8 +355,8 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
t.Fatal(err)
}
- conn.Send(tb.UDP{DstPort: &cleanPort})
- conn.Send(tb.UDP{})
+ conn.Send(testbench.UDP{DstPort: &cleanPort})
+ conn.Send(testbench.UDP{})
wg.Wait()
})
}
diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go
index 0bae18ba3..d51a34145 100644
--- a/test/packetimpact/tests/udp_recv_multicast_test.go
+++ b/test/packetimpact/tests/udp_recv_multicast_test.go
@@ -21,22 +21,22 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/tcpip"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func TestUDPRecvMulticast(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
defer dut.Close(boundFD)
- conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer conn.Close()
- frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte("hello world")})
- frame[1].(*tb.IPv4).DstAddr = tb.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4()))
+ frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte("hello world")})
+ frame[1].(*testbench.IPv4).DstAddr = testbench.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4()))
conn.SendFrame(frame)
dut.Recv(boundFD, 100, 0)
}
diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go
index 350875a6f..bf64803e2 100644
--- a/test/packetimpact/tests/udp_send_recv_dgram_test.go
+++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go
@@ -22,11 +22,11 @@ import (
"time"
"golang.org/x/sys/unix"
- tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
)
func init() {
- tb.RegisterFlags(flag.CommandLine)
+ testbench.RegisterFlags(flag.CommandLine)
}
func generateRandomPayload(t *testing.T, n int) string {
@@ -39,11 +39,11 @@ func generateRandomPayload(t *testing.T, n int) string {
}
func TestUDPRecv(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
defer dut.Close(boundFD)
- conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer conn.Close()
testCases := []struct {
@@ -59,7 +59,7 @@ func TestUDPRecv(t *testing.T) {
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte(tc.payload)})
+ frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte(tc.payload)})
conn.SendFrame(frame)
if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), tc.payload; got != want {
t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want)
@@ -69,11 +69,11 @@ func TestUDPRecv(t *testing.T) {
}
func TestUDPSend(t *testing.T) {
- dut := tb.NewDUT(t)
+ dut := testbench.NewDUT(t)
defer dut.TearDown()
boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
defer dut.Close(boundFD)
- conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort})
+ conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
defer conn.Close()
testCases := []struct {
@@ -93,7 +93,7 @@ func TestUDPSend(t *testing.T) {
if got, want := int(dut.SendTo(boundFD, []byte(tc.payload), 0, conn.LocalAddr())), len(tc.payload); got != want {
t.Fatalf("short write got: %d, want: %d", got, want)
}
- if _, err := conn.ExpectData(tb.UDP{SrcPort: &remotePort}, tb.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil {
+ if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil {
t.Fatal(err)
}
})
diff --git a/test/root/BUILD b/test/root/BUILD
index 639e293e3..a9e91ccd6 100644
--- a/test/root/BUILD
+++ b/test/root/BUILD
@@ -33,6 +33,7 @@ go_test(
],
visibility = ["//:sandbox"],
deps = [
+ "//pkg/cleanup",
"//pkg/test/criutil",
"//pkg/test/dockerutil",
"//pkg/test/testutil",
diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go
index 85007dcce..c138e02dc 100644
--- a/test/root/crictl_test.go
+++ b/test/root/crictl_test.go
@@ -30,10 +30,10 @@ import (
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/test/criutil"
"gvisor.dev/gvisor/pkg/test/dockerutil"
"gvisor.dev/gvisor/pkg/test/testutil"
- "gvisor.dev/gvisor/runsc/specutils"
)
// Tests for crictl have to be run as root (rather than in a user namespace)
@@ -272,27 +272,20 @@ disabled_plugins = ["restart"]
// * Runs containerd and waits for it to reach a "ready" state for testing.
// * Returns a cleanup function that should be called at the end of the test.
func setup(t *testing.T) (*criutil.Crictl, func(), error) {
- var cleanups []func()
- cleanupFunc := func() {
- for i := len(cleanups) - 1; i >= 0; i-- {
- cleanups[i]()
- }
- }
- cleanup := specutils.MakeCleanup(cleanupFunc)
- defer cleanup.Clean()
-
// Create temporary containerd root and state directories, and a socket
// via which crictl and containerd communicate.
containerdRoot, err := ioutil.TempDir(testutil.TmpDir(), "containerd-root")
if err != nil {
t.Fatalf("failed to create containerd root: %v", err)
}
- cleanups = append(cleanups, func() { os.RemoveAll(containerdRoot) })
+ cu := cleanup.Make(func() { os.RemoveAll(containerdRoot) })
+ defer cu.Clean()
+
containerdState, err := ioutil.TempDir(testutil.TmpDir(), "containerd-state")
if err != nil {
t.Fatalf("failed to create containerd state: %v", err)
}
- cleanups = append(cleanups, func() { os.RemoveAll(containerdState) })
+ cu.Add(func() { os.RemoveAll(containerdState) })
sockAddr := filepath.Join(testutil.TmpDir(), "containerd-test.sock")
// We rewrite a configuration. This is based on the current docker
@@ -305,7 +298,7 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) {
if err != nil {
t.Fatalf("failed to write containerd config")
}
- cleanups = append(cleanups, configCleanup)
+ cu.Add(configCleanup)
// Start containerd.
cmd := exec.Command(getContainerd(),
@@ -321,7 +314,8 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) {
stdout := &bytes.Buffer{}
cmd.Stderr = io.MultiWriter(startupW, stderr)
cmd.Stdout = io.MultiWriter(startupW, stdout)
- cleanups = append(cleanups, func() {
+ cu.Add(func() {
+ // Log output in case of failure.
t.Logf("containerd stdout: %s", stdout.String())
t.Logf("containerd stderr: %s", stderr.String())
})
@@ -338,15 +332,14 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) {
// Kill must be the last cleanup (as it will be executed first).
cc := criutil.NewCrictl(t, sockAddr)
- cleanups = append(cleanups, func() {
+ cu.Add(func() {
cc.CleanUp() // Remove tmp files, etc.
if err := testutil.KillCommand(cmd); err != nil {
log.Printf("error killing containerd: %v", err)
}
})
- cleanup.Release()
- return cc, cleanupFunc, nil
+ return cc, cu.Release(), nil
}
// httpGet GETs the contents of a file served from a pod on port 80.
diff --git a/test/root/oom_score_adj_test.go b/test/root/oom_score_adj_test.go
index 9a3cecd97..4243eb59e 100644
--- a/test/root/oom_score_adj_test.go
+++ b/test/root/oom_score_adj_test.go
@@ -20,6 +20,7 @@ import (
"testing"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/specutils"
@@ -324,40 +325,26 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
}
func startContainers(t *testing.T, specs []*specs.Spec, ids []string) ([]*container.Container, func(), error) {
- var (
- containers []*container.Container
- cleanups []func()
- )
- cleanups = append(cleanups, func() {
- for _, c := range containers {
- c.Destroy()
- }
- })
- cleanupAll := func() {
- for _, c := range cleanups {
- c()
- }
- }
- localClean := specutils.MakeCleanup(cleanupAll)
- defer localClean.Clean()
+ var containers []*container.Container
// All containers must share the same root.
- rootDir, cleanup, err := testutil.SetupRootDir()
+ rootDir, clean, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- cleanups = append(cleanups, cleanup)
+ cu := cleanup.Make(clean)
+ defer cu.Clean()
// Point this to from the configuration.
conf := testutil.TestConfig(t)
conf.RootDir = rootDir
for i, spec := range specs {
- bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
+ bundleDir, clean, err := testutil.SetupBundleDir(spec)
if err != nil {
return nil, nil, fmt.Errorf("error setting up bundle: %v", err)
}
- cleanups = append(cleanups, cleanup)
+ cu.Add(clean)
args := container.Args{
ID: ids[i],
@@ -375,6 +362,5 @@ func startContainers(t *testing.T, specs []*specs.Spec, ids []string) ([]*contai
}
}
- localClean.Release()
- return containers, cleanupAll, nil
+ return containers, cu.Release(), nil
}
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 0a75b158f..402ba4064 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -60,7 +60,8 @@ def _syscall_test(
network = "none",
file_access = "exclusive",
overlay = False,
- add_uds_tree = False):
+ add_uds_tree = False,
+ vfs2 = False):
# Prepend "runsc" to non-native platform names.
full_platform = platform if platform == "native" else "runsc_" + platform
@@ -70,6 +71,8 @@ def _syscall_test(
name += "_shared"
if overlay:
name += "_overlay"
+ if vfs2:
+ name += "_vfs2"
if network != "none":
name += "_" + network + "net"
@@ -102,6 +105,7 @@ def _syscall_test(
"--file-access=" + file_access,
"--overlay=" + str(overlay),
"--add-uds-tree=" + str(add_uds_tree),
+ "--vfs2=" + str(vfs2),
]
# Call the rule above.
@@ -123,6 +127,7 @@ def syscall_test(
add_overlay = False,
add_uds_tree = False,
add_hostinet = False,
+ vfs2 = False,
tags = None):
"""syscall_test is a macro that will create targets for all platforms.
@@ -160,6 +165,29 @@ def syscall_test(
tags = platform_tags + tags,
)
+ vfs2_tags = list(tags)
+ if vfs2:
+ # Add tag to easily run VFS2 tests with --test_tag_filters=vfs2
+ vfs2_tags.append("vfs2")
+
+ else:
+ # Don't automatically run tests tests not yet passing.
+ vfs2_tags.append("manual")
+ vfs2_tags.append("noguitar")
+ vfs2_tags.append("notap")
+
+ _syscall_test(
+ test = test,
+ shard_count = shard_count,
+ size = size,
+ platform = default_platform,
+ use_tmpfs = use_tmpfs,
+ add_uds_tree = add_uds_tree,
+ tags = platforms[default_platform] + vfs2_tags,
+ vfs2 = True,
+ )
+
+ # TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests.
if add_overlay:
_syscall_test(
test = test,
@@ -172,6 +200,18 @@ def syscall_test(
overlay = True,
)
+ if add_hostinet:
+ _syscall_test(
+ test = test,
+ shard_count = shard_count,
+ size = size,
+ platform = default_platform,
+ use_tmpfs = use_tmpfs,
+ network = "host",
+ add_uds_tree = add_uds_tree,
+ tags = platforms[default_platform] + tags,
+ )
+
if not use_tmpfs:
# Also test shared gofer access.
_syscall_test(
@@ -184,15 +224,14 @@ def syscall_test(
tags = platforms[default_platform] + tags,
file_access = "shared",
)
-
- if add_hostinet:
_syscall_test(
test = test,
shard_count = shard_count,
size = size,
platform = default_platform,
use_tmpfs = use_tmpfs,
- network = "host",
add_uds_tree = add_uds_tree,
- tags = platforms[default_platform] + tags,
+ tags = platforms[default_platform] + vfs2_tags,
+ file_access = "shared",
+ vfs2 = True,
)
diff --git a/test/runner/runner.go b/test/runner/runner.go
index e4f04cd2a..948e3a8ef 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -46,6 +46,7 @@ var (
useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp")
fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay")
+ vfs2 = flag.Bool("vfs2", false, "enable VFS2")
parallel = flag.Bool("parallel", false, "run tests in parallel")
runscPath = flag.String("runsc", "", "path to runsc binary")
@@ -146,6 +147,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error {
if *overlay {
args = append(args, "-overlay")
}
+ if *vfs2 {
+ args = append(args, "-vfs2")
+ }
if *debug {
args = append(args, "-debug", "-log-packets=true")
}
@@ -204,7 +208,7 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error {
return
}
log.Warningf("%s: Got signal: %v", name, s)
- done := make(chan bool)
+ done := make(chan bool, 1)
dArgs := append([]string{}, args...)
dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id)
go func(dArgs []string) {
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 9800a0cdf..3406a2de8 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -2,22 +2,33 @@ load("//test/runner:defs.bzl", "syscall_test")
package(licenses = ["notice"])
-syscall_test(test = "//test/syscalls/linux:32bit_test")
+syscall_test(
+ test = "//test/syscalls/linux:32bit_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:accept_bind_stream_test")
+syscall_test(
+ test = "//test/syscalls/linux:accept_bind_stream_test",
+ vfs2 = "True",
+)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:accept_bind_test",
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:access_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:affinity_test")
+syscall_test(
+ test = "//test/syscalls/linux:affinity_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
@@ -28,11 +39,18 @@ syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:alarm_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:arch_prctl_test")
+syscall_test(
+ test = "//test/syscalls/linux:arch_prctl_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:bad_test")
+syscall_test(
+ test = "//test/syscalls/linux:bad_test",
+ vfs2 = "True",
+)
syscall_test(
size = "large",
@@ -40,9 +58,15 @@ syscall_test(
test = "//test/syscalls/linux:bind_test",
)
-syscall_test(test = "//test/syscalls/linux:brk_test")
+syscall_test(
+ test = "//test/syscalls/linux:brk_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_test",
+ vfs2 = "True",
+)
syscall_test(
size = "large",
@@ -51,16 +75,19 @@ syscall_test(
# involve much concurrency, TSAN's usefulness here is limited anyway.
tags = ["nogotsan"],
test = "//test/syscalls/linux:socket_stress_test",
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:chdir_test",
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:chmod_test",
+ vfs2 = "True",
)
syscall_test(
@@ -68,6 +95,7 @@ syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:chown_test",
use_tmpfs = True, # chwon tests require gofer to be running as root.
+ vfs2 = "True",
)
syscall_test(
@@ -75,45 +103,70 @@ syscall_test(
test = "//test/syscalls/linux:chroot_test",
)
-syscall_test(test = "//test/syscalls/linux:clock_getres_test")
+syscall_test(
+ test = "//test/syscalls/linux:clock_getres_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:clock_gettime_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:clock_nanosleep_test")
+syscall_test(
+ test = "//test/syscalls/linux:clock_nanosleep_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:concurrency_test")
+syscall_test(
+ test = "//test/syscalls/linux:concurrency_test",
+ vfs2 = "True",
+)
syscall_test(
add_uds_tree = True,
test = "//test/syscalls/linux:connect_external_test",
use_tmpfs = True,
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:creat_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:dev_test")
+syscall_test(
+ test = "//test/syscalls/linux:dev_test",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:dup_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:epoll_test")
+syscall_test(
+ test = "//test/syscalls/linux:epoll_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:eventfd_test")
+syscall_test(
+ test = "//test/syscalls/linux:eventfd_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:exceptions_test")
+syscall_test(
+ test = "//test/syscalls/linux:exceptions_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:exec_test",
+ vfs2 = "True",
)
syscall_test(
@@ -122,7 +175,10 @@ syscall_test(
test = "//test/syscalls/linux:exec_binary_test",
)
-syscall_test(test = "//test/syscalls/linux:exit_test")
+syscall_test(
+ test = "//test/syscalls/linux:exit_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
@@ -134,11 +190,15 @@ syscall_test(
test = "//test/syscalls/linux:fallocate_test",
)
-syscall_test(test = "//test/syscalls/linux:fault_test")
+syscall_test(
+ test = "//test/syscalls/linux:fault_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:fchdir_test",
+ vfs2 = "True",
)
syscall_test(
@@ -152,11 +212,20 @@ syscall_test(
test = "//test/syscalls/linux:flock_test",
)
-syscall_test(test = "//test/syscalls/linux:fork_test")
+syscall_test(
+ test = "//test/syscalls/linux:fork_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:fpsig_fork_test")
+syscall_test(
+ test = "//test/syscalls/linux:fpsig_fork_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:fpsig_nested_test")
+syscall_test(
+ test = "//test/syscalls/linux:fpsig_nested_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
@@ -167,20 +236,33 @@ syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:futex_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:getcpu_host_test")
+syscall_test(
+ test = "//test/syscalls/linux:getcpu_host_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:getcpu_test")
+syscall_test(
+ test = "//test/syscalls/linux:getcpu_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:getdents_test",
)
-syscall_test(test = "//test/syscalls/linux:getrandom_test")
+syscall_test(
+ test = "//test/syscalls/linux:getrandom_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:getrusage_test")
+syscall_test(
+ test = "//test/syscalls/linux:getrusage_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
@@ -196,15 +278,20 @@ syscall_test(
syscall_test(
test = "//test/syscalls/linux:iptables_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 5,
test = "//test/syscalls/linux:itimer_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:kill_test")
+syscall_test(
+ test = "//test/syscalls/linux:kill_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
@@ -215,19 +302,33 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:lseek_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:madvise_test")
+syscall_test(
+ test = "//test/syscalls/linux:madvise_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:memory_accounting_test")
+syscall_test(
+ test = "//test/syscalls/linux:memory_accounting_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:mempolicy_test")
+syscall_test(
+ test = "//test/syscalls/linux:mempolicy_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:mincore_test")
+syscall_test(
+ test = "//test/syscalls/linux:mincore_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:mkdir_test",
+ vfs2 = "True",
)
syscall_test(
@@ -249,20 +350,29 @@ syscall_test(
syscall_test(
size = "medium",
test = "//test/syscalls/linux:mremap_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:msync_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:munmap_test")
+syscall_test(
+ test = "//test/syscalls/linux:munmap_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:network_namespace_test")
+syscall_test(
+ test = "//test/syscalls/linux:network_namespace_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:open_create_test",
+ vfs2 = "True",
)
syscall_test(
@@ -270,40 +380,65 @@ syscall_test(
test = "//test/syscalls/linux:open_test",
)
-syscall_test(test = "//test/syscalls/linux:packet_socket_raw_test")
+syscall_test(
+ test = "//test/syscalls/linux:packet_socket_raw_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:packet_socket_test")
+syscall_test(
+ test = "//test/syscalls/linux:packet_socket_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:partial_bad_buffer_test")
+syscall_test(
+ test = "//test/syscalls/linux:partial_bad_buffer_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:pause_test")
+syscall_test(
+ test = "//test/syscalls/linux:pause_test",
+ vfs2 = "True",
+)
syscall_test(
size = "large",
add_overlay = True,
shard_count = 5,
test = "//test/syscalls/linux:pipe_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:poll_test")
+syscall_test(
+ test = "//test/syscalls/linux:poll_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:ppoll_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:prctl_setuid_test")
+syscall_test(
+ test = "//test/syscalls/linux:prctl_setuid_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:prctl_test")
+syscall_test(
+ test = "//test/syscalls/linux:prctl_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:pread64_test",
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:preadv_test",
+ vfs2 = "True",
)
syscall_test(
@@ -311,36 +446,56 @@ syscall_test(
test = "//test/syscalls/linux:preadv2_test",
)
-syscall_test(test = "//test/syscalls/linux:priority_test")
+syscall_test(
+ test = "//test/syscalls/linux:priority_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:proc_test",
)
-syscall_test(test = "//test/syscalls/linux:proc_net_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_net_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:proc_pid_oomscore_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_pid_oomscore_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:proc_pid_smaps_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_pid_smaps_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_pid_uid_gid_map_test",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:pselect_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:ptrace_test")
+syscall_test(
+ test = "//test/syscalls/linux:ptrace_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:pty_test",
+ vfs2 = "True",
)
syscall_test(
test = "//test/syscalls/linux:pty_root_test",
+ vfs2 = "True",
)
syscall_test(
@@ -351,17 +506,28 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:pwrite64_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:raw_socket_hdrincl_test")
+syscall_test(
+ test = "//test/syscalls/linux:raw_socket_hdrincl_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:raw_socket_icmp_test")
+syscall_test(
+ test = "//test/syscalls/linux:raw_socket_icmp_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:raw_socket_ipv4_test")
+syscall_test(
+ test = "//test/syscalls/linux:raw_socket_ipv4_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:read_test",
+ vfs2 = "True",
)
syscall_test(
@@ -373,12 +539,14 @@ syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:readv_socket_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:readv_test",
+ vfs2 = "True",
)
syscall_test(
@@ -387,25 +555,50 @@ syscall_test(
test = "//test/syscalls/linux:rename_test",
)
-syscall_test(test = "//test/syscalls/linux:rlimits_test")
+syscall_test(
+ test = "//test/syscalls/linux:rlimits_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:rseq_test")
+syscall_test(
+ test = "//test/syscalls/linux:rseq_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:rtsignal_test")
+syscall_test(
+ test = "//test/syscalls/linux:rtsignal_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:signalfd_test")
+syscall_test(
+ test = "//test/syscalls/linux:signalfd_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:sched_test")
+syscall_test(
+ test = "//test/syscalls/linux:sched_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:sched_yield_test")
+syscall_test(
+ test = "//test/syscalls/linux:sched_yield_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:seccomp_test")
+syscall_test(
+ test = "//test/syscalls/linux:seccomp_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:select_test")
+syscall_test(
+ test = "//test/syscalls/linux:select_test",
+ vfs2 = "True",
+)
syscall_test(
shard_count = 20,
test = "//test/syscalls/linux:semaphore_test",
+ vfs2 = "True",
)
syscall_test(
@@ -421,49 +614,68 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:splice_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:sigaction_test")
+syscall_test(
+ test = "//test/syscalls/linux:sigaction_test",
+ vfs2 = "True",
+)
# TODO(b/119826902): Enable once the test passes in runsc.
-# syscall_test(test = "//test/syscalls/linux:sigaltstack_test")
+# syscall_test(vfs2="True",test = "//test/syscalls/linux:sigaltstack_test")
-syscall_test(test = "//test/syscalls/linux:sigiret_test")
+syscall_test(
+ test = "//test/syscalls/linux:sigiret_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:sigprocmask_test")
+syscall_test(
+ test = "//test/syscalls/linux:sigprocmask_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:sigstop_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:sigtimedwait_test")
+syscall_test(
+ test = "//test/syscalls/linux:sigtimedwait_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:shm_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_abstract_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_abstract_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_domain_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_domain_test",
+ vfs2 = "True",
)
syscall_test(
@@ -489,58 +701,90 @@ syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_ip_tcp_loopback_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_ip_tcp_loopback_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
shard_count = 50,
test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_ip_udp_loopback_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
size = "large",
shard_count = 50,
test = "//test/syscalls/linux:socket_ip_udp_loopback_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:socket_ip_unbound_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_ip_unbound_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_netdevice_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_netdevice_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_netlink_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_netlink_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_netlink_route_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_netlink_route_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_netlink_uevent_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_netlink_uevent_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_blocking_local_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_blocking_local_test",
+)
-syscall_test(test = "//test/syscalls/linux:socket_blocking_ip_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_blocking_ip_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_local_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_non_stream_blocking_local_test",
+)
-syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test")
+syscall_test(
+ test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test",
+ vfs2 = "True",
+)
syscall_test(
size = "large",
@@ -550,6 +794,7 @@ syscall_test(
syscall_test(
size = "large",
test = "//test/syscalls/linux:socket_stream_blocking_tcp_test",
+ vfs2 = "True",
)
syscall_test(
@@ -572,6 +817,7 @@ syscall_test(
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_unix_dgram_non_blocking_test",
+ vfs2 = "True",
)
syscall_test(
@@ -579,6 +825,7 @@ syscall_test(
add_overlay = True,
shard_count = 50,
test = "//test/syscalls/linux:socket_unix_pair_test",
+ vfs2 = "True",
)
syscall_test(
@@ -596,11 +843,13 @@ syscall_test(
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_unix_unbound_abstract_test",
+ vfs2 = "True",
)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:socket_unix_unbound_dgram_test",
+ vfs2 = "True",
)
syscall_test(
@@ -612,6 +861,7 @@ syscall_test(
size = "medium",
shard_count = 10,
test = "//test/syscalls/linux:socket_unix_unbound_seqpacket_test",
+ vfs2 = "True",
)
syscall_test(
@@ -623,6 +873,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:statfs_test",
+ vfs2 = "True",
)
syscall_test(
@@ -633,6 +884,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:stat_times_test",
+ vfs2 = "True",
)
syscall_test(
@@ -648,6 +900,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:sync_test",
+ vfs2 = "True",
)
syscall_test(
@@ -655,86 +908,151 @@ syscall_test(
test = "//test/syscalls/linux:sync_file_range_test",
)
-syscall_test(test = "//test/syscalls/linux:sysinfo_test")
+syscall_test(
+ test = "//test/syscalls/linux:sysinfo_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:syslog_test")
+syscall_test(
+ test = "//test/syscalls/linux:syslog_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:sysret_test")
+syscall_test(
+ test = "//test/syscalls/linux:sysret_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
shard_count = 10,
test = "//test/syscalls/linux:tcp_socket_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:tgkill_test")
+syscall_test(
+ test = "//test/syscalls/linux:tgkill_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:timerfd_test")
+syscall_test(
+ test = "//test/syscalls/linux:timerfd_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:timers_test")
+syscall_test(
+ test = "//test/syscalls/linux:timers_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:time_test")
+syscall_test(
+ test = "//test/syscalls/linux:time_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:tkill_test")
+syscall_test(
+ test = "//test/syscalls/linux:tkill_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:truncate_test",
)
-syscall_test(test = "//test/syscalls/linux:tuntap_test")
+syscall_test(
+ test = "//test/syscalls/linux:tuntap_test",
+)
syscall_test(
add_hostinet = True,
test = "//test/syscalls/linux:tuntap_hostinet_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:udp_bind_test")
+syscall_test(
+ test = "//test/syscalls/linux:udp_bind_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
add_hostinet = True,
shard_count = 10,
test = "//test/syscalls/linux:udp_socket_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:uidgid_test")
+syscall_test(
+ test = "//test/syscalls/linux:uidgid_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:uname_test")
+syscall_test(
+ test = "//test/syscalls/linux:uname_test",
+ vfs2 = "True",
+)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:unlink_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:unshare_test")
+syscall_test(
+ test = "//test/syscalls/linux:unshare_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:utimes_test")
+syscall_test(
+ test = "//test/syscalls/linux:utimes_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
test = "//test/syscalls/linux:vdso_clock_gettime_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:vdso_test")
+syscall_test(
+ test = "//test/syscalls/linux:vdso_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:vsyscall_test")
+syscall_test(
+ test = "//test/syscalls/linux:vsyscall_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:vfork_test")
+syscall_test(
+ test = "//test/syscalls/linux:vfork_test",
+ vfs2 = "True",
+)
syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:wait_test",
+ vfs2 = "True",
)
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:write_test",
+ vfs2 = "True",
)
-syscall_test(test = "//test/syscalls/linux:proc_net_unix_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_net_unix_test",
+)
-syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_net_tcp_test",
+ vfs2 = "True",
+)
-syscall_test(test = "//test/syscalls/linux:proc_net_udp_test")
+syscall_test(
+ test = "//test/syscalls/linux:proc_net_udp_test",
+ vfs2 = "True",
+)
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 5acdb8438..f4b5de18d 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -951,6 +951,7 @@ cc_binary(
"//test/util:epoll_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
@@ -1382,7 +1383,7 @@ cc_binary(
srcs = ["partial_bad_buffer.cc"],
linkstatic = 1,
deps = [
- "//test/syscalls/linux:socket_test_util",
+ ":socket_test_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
"@com_google_absl//absl/time",
@@ -3461,7 +3462,7 @@ cc_binary(
deps = [
":socket_test_util",
gtest,
- "//test/syscalls/linux:socket_netlink_route_util",
+ ":socket_netlink_route_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc
index e08c578f0..f65a14fb8 100644
--- a/test/syscalls/linux/accept_bind.cc
+++ b/test/syscalls/linux/accept_bind.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include <stdio.h>
+#include <sys/socket.h>
#include <sys/un.h>
#include <algorithm>
@@ -141,6 +142,47 @@ TEST_P(AllSocketPairTest, Connect) {
SyscallSucceeds());
}
+TEST_P(AllSocketPairTest, ConnectWithWrongType) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int type;
+ socklen_t typelen = sizeof(type);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_TYPE, &type, &typelen),
+ SyscallSucceeds());
+ switch (type) {
+ case SOCK_STREAM:
+ type = SOCK_SEQPACKET;
+ break;
+ case SOCK_SEQPACKET:
+ type = SOCK_STREAM;
+ break;
+ }
+
+ const FileDescriptor another_socket =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, type, 0));
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ if (sockets->first_addr()->sa_data[0] != 0) {
+ ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(EPROTOTYPE));
+ } else {
+ ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(ECONNREFUSED));
+ }
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
TEST_P(AllSocketPairTest, ConnectNonListening) {
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 0e13ad190..1d1a7171d 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -19,6 +19,7 @@
#include <sys/inotify.h>
#include <sys/ioctl.h>
#include <sys/time.h>
+#include <sys/xattr.h>
#include <atomic>
#include <list>
@@ -33,6 +34,7 @@
#include "test/util/epoll_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
#include "test/util/thread_util.h"
@@ -335,6 +337,11 @@ TEST(Inotify, InotifyFdNotWritable) {
EXPECT_THAT(write(fd.get(), "x", 1), SyscallFailsWithErrno(EBADF));
}
+TEST(Inotify, InitFlags) {
+ EXPECT_THAT(inotify_init1(IN_NONBLOCK | IN_CLOEXEC), SyscallSucceeds());
+ EXPECT_THAT(inotify_init1(12345), SyscallFailsWithErrno(EINVAL));
+}
+
TEST(Inotify, NonBlockingReadReturnsEagain) {
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
@@ -395,7 +402,7 @@ TEST(Inotify, CanDeleteFileAfterRemovingWatch) {
file1.reset();
}
-TEST(Inotify, CanRemoveWatchAfterDeletingFile) {
+TEST(Inotify, RemoveWatchAfterDeletingFileFails) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
TempPath file1 =
ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
@@ -491,17 +498,23 @@ TEST(Inotify, DeletingChildGeneratesEvents) {
Event(IN_DELETE, root_wd, Basename(file1_path))}));
}
+// Creating a file in "parent/child" should generate events for child, but not
+// parent.
TEST(Inotify, CreatingFileGeneratesEvents) {
- const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath child =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS));
const int wd = ASSERT_NO_ERRNO_AND_VALUE(
- InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS));
// Create a new file in the directory.
const TempPath file1 =
- ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(child.path()));
const std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
@@ -554,6 +567,47 @@ TEST(Inotify, WritingFileGeneratesModifyEvent) {
ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))}));
}
+TEST(Inotify, SizeZeroReadWriteGeneratesNothing) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ // Read from the empty file.
+ int val;
+ ASSERT_THAT(read(file1_fd.get(), &val, sizeof(val)),
+ SyscallSucceedsWithValue(0));
+
+ // Write zero bytes.
+ ASSERT_THAT(write(file1_fd.get(), "", 0), SyscallSucceedsWithValue(0));
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, FailedFileCreationGeneratesNoEvents) {
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string dir_path = dir.path();
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), dir_path, IN_ALL_EVENTS));
+
+ const char* p = dir_path.c_str();
+ ASSERT_THAT(mkdir(p, 0777), SyscallFails());
+ ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails());
+ ASSERT_THAT(symlink(p, p), SyscallFails());
+ ASSERT_THAT(link(p, p), SyscallFails());
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({}));
+}
+
TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const FileDescriptor fd =
@@ -602,7 +656,7 @@ TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) {
Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))}));
}
-TEST(Inotify, WatchTargetDeletionGeneratesEvent) {
+TEST(Inotify, RmdirOnWatchedTargetGeneratesEvent) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
@@ -1228,7 +1282,7 @@ TEST(Inotify, LinkGeneratesAttribAndCreateEvents) {
InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
const int rc = link(file1.path().c_str(), link1.path().c_str());
- // link(2) is only supported on tmpfs in the sandbox.
+ // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
(errno == EPERM || errno == ENOENT));
ASSERT_THAT(rc, SyscallSucceeds());
@@ -1322,21 +1376,27 @@ TEST(Inotify, HardlinksReuseSameWatch) {
Event(IN_DELETE, root_wd, Basename(file1_path))}));
}
+// Calling mkdir within "parent/child" should generate an event for child, but
+// not parent.
TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) {
- const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath child =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
- const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
- InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS));
+ const int child_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS));
- const TempPath dir1(NewTempAbsPathInDir(root.path()));
+ const TempPath dir1(NewTempAbsPathInDir(child.path()));
ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds());
const std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
ASSERT_THAT(
events,
- Are({Event(IN_CREATE | IN_ISDIR, root_wd, Basename(dir1.path()))}));
+ Are({Event(IN_CREATE | IN_ISDIR, child_wd, Basename(dir1.path()))}));
}
TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) {
@@ -1596,7 +1656,44 @@ TEST(Inotify, EpollNoDeadlock) {
}
}
-TEST(Inotify, SpliceEvent) {
+// On Linux, inotify behavior is not very consistent with splice(2). We try our
+// best to emulate Linux for very basic calls to splice.
+TEST(Inotify, SpliceOnWatchTarget) {
+ int pipes[2];
+ ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ dir.path(), "some content", TempPath::kDefaultFileMode));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS));
+ const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS));
+
+ EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0),
+ SyscallSucceedsWithValue(1));
+
+ // Surprisingly, events are not generated in Linux if we read from a file.
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ ASSERT_THAT(events, Are({}));
+
+ EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0),
+ SyscallSucceedsWithValue(1));
+
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ ASSERT_THAT(events, Are({
+ Event(IN_MODIFY, dir_wd, Basename(file.path())),
+ Event(IN_MODIFY, file_wd),
+ }));
+}
+
+TEST(Inotify, SpliceOnInotifyFD) {
int pipes[2];
ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
@@ -1624,6 +1721,315 @@ TEST(Inotify, SpliceEvent) {
ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)}));
}
+// Watches on a parent should not be triggered by actions on a hard link to one
+// of its children that has a different parent.
+TEST(Inotify, LinkOnOtherParent) {
+ const TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ std::string link_path = NewTempAbsPathInDir(dir2.path());
+
+ const int rc = link(file.path().c_str(), link_path.c_str());
+ // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+ (errno == EPERM || errno == ENOENT));
+ ASSERT_THAT(rc, SyscallSucceeds());
+
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), dir1.path(), IN_ALL_EVENTS));
+
+ // Perform various actions on the link outside of dir1, which should trigger
+ // no inotify events.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(link_path.c_str(), O_RDWR));
+ int val = 0;
+ ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds());
+ ASSERT_THAT(unlink(link_path.c_str()), SyscallSucceeds());
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({}));
+}
+
+TEST(Inotify, Xattr) {
+ // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer.
+ SKIP_IF(IsRunningOnGvisor());
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string path = file.path();
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDWR));
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), path, IN_ALL_EVENTS));
+
+ const char* cpath = path.c_str();
+ const char* name = "user.test";
+ int val = 123;
+ ASSERT_THAT(setxattr(cpath, name, &val, sizeof(val), /*flags=*/0),
+ SyscallSucceeds());
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+ ASSERT_THAT(getxattr(cpath, name, &val, sizeof(val)), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({}));
+
+ char list[100];
+ ASSERT_THAT(listxattr(cpath, list, sizeof(list)), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({}));
+
+ ASSERT_THAT(removexattr(cpath, name), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+ ASSERT_THAT(fsetxattr(fd.get(), name, &val, sizeof(val), /*flags=*/0),
+ SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+ ASSERT_THAT(fgetxattr(fd.get(), name, &val, sizeof(val)), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({}));
+
+ ASSERT_THAT(flistxattr(fd.get(), list, sizeof(list)), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({}));
+
+ ASSERT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+}
+
+TEST(Inotify, Exec) {
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(dir.path(), "/bin/true"));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), bin.path(), IN_ALL_EVENTS));
+
+ // Perform exec.
+ ScopedThread t([&bin]() {
+ ASSERT_THAT(execl(bin.path().c_str(), bin.path().c_str(), (char*)nullptr),
+ SyscallSucceeds());
+ });
+ t.Join();
+
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_OPEN, wd), Event(IN_ACCESS, wd)}));
+}
+
+// Watches without IN_EXCL_UNLINK, should continue to emit events for file
+// descriptors after their corresponding files have been unlinked.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
+ const DisableSave ds;
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS));
+ const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS));
+
+ ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+ int val = 0;
+ ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({
+ Event(IN_ATTRIB, file_wd),
+ Event(IN_DELETE, dir_wd, Basename(file.path())),
+ Event(IN_ACCESS, dir_wd, Basename(file.path())),
+ Event(IN_ACCESS, file_wd),
+ Event(IN_MODIFY, dir_wd, Basename(file.path())),
+ Event(IN_MODIFY, file_wd),
+ }));
+}
+
+// Watches created with IN_EXCL_UNLINK will stop emitting events on fds for
+// children that have already been unlinked.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlink_NoRandomSave) {
+ const DisableSave ds;
+ // TODO(gvisor.dev/issue/1624): This test fails on VFS1.
+ SKIP_IF(IsRunningWithVFS1());
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+ // Unlink the child, which should cause further operations on the open file
+ // descriptor to be ignored.
+ ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+ int val = 0;
+ ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_DELETE, wd, Basename(file.path()))}));
+}
+
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) {
+ const DisableSave ds;
+
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
+ std::string dirPath = dir.path();
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+ // Unlink the dir, and then close the open fd.
+ ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds());
+ dir.reset();
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ // No close event should appear.
+ ASSERT_THAT(events,
+ Are({Event(IN_DELETE | IN_ISDIR, wd, Basename(dirPath))}));
+}
+
+// If "dir/child" and "dir/child2" are links to the same file, and "dir/child"
+// is unlinked, a watch on "dir" with IN_EXCL_UNLINK will exclude future events
+// for fds on "dir/child" but not "dir/child2".
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkMultipleChildren_NoRandomSave) {
+ const DisableSave ds;
+ // TODO(gvisor.dev/issue/1624): This test fails on VFS1.
+ SKIP_IF(IsRunningWithVFS1());
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+ std::string path1 = file.path();
+ std::string path2 = NewTempAbsPathInDir(dir.path());
+
+ const int rc = link(path1.c_str(), path2.c_str());
+ // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+ (errno == EPERM || errno == ENOENT));
+ ASSERT_THAT(rc, SyscallSucceeds());
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path1.c_str(), O_RDWR));
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path2.c_str(), O_RDWR));
+
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+ // After unlinking path1, only events on the fd for path2 should be generated.
+ ASSERT_THAT(unlink(path1.c_str()), SyscallSucceeds());
+ ASSERT_THAT(write(fd1.get(), "x", 1), SyscallSucceeds());
+ ASSERT_THAT(write(fd2.get(), "x", 1), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({
+ Event(IN_DELETE, wd, Basename(path1)),
+ Event(IN_MODIFY, wd, Basename(path2)),
+ }));
+}
+
+// On native Linux, actions of data type FSNOTIFY_EVENT_INODE are not affected
+// by IN_EXCL_UNLINK (see
+// fs/notify/inotify/inotify_fsnotify.c:inotify_handle_event). Inode-level
+// events include changes to metadata and extended attributes.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
+ const DisableSave ds;
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+ // NOTE(b/157163751): Create another link before unlinking. This is needed for
+ // the gofer filesystem in gVisor, where open fds will not work once the link
+ // count hits zero. In VFS2, we end up skipping the gofer test anyway, because
+ // hard links are not supported for gofer fs.
+ if (IsRunningOnGvisor()) {
+ std::string link_path = NewTempAbsPath();
+ const int rc = link(file.path().c_str(), link_path.c_str());
+ // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(rc != 0 && (errno == EPERM || errno == ENOENT));
+ ASSERT_THAT(rc, SyscallSucceeds());
+ }
+
+ // Even after unlinking, inode-level operations will trigger events regardless
+ // of IN_EXCL_UNLINK.
+ ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+
+ // Perform various actions on fd.
+ ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds());
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({
+ Event(IN_DELETE, wd, Basename(file.path())),
+ Event(IN_MODIFY, wd, Basename(file.path())),
+ }));
+
+ struct timeval times[2] = {{1, 0}, {2, 0}};
+ ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))}));
+
+ // S/R is disabled on this entire test due to behavior with unlink; it must
+ // also be disabled after this point because of fchmod.
+ ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds());
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))}));
+}
+
} // namespace
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 640fe6bfc..670c0284b 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -416,6 +416,29 @@ TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission_NoRandomSave) {
EXPECT_EQ(stat.st_size, 0);
}
+TEST_F(OpenTest, CanTruncateWithStrangePermissions) {
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+ const DisableSave ds; // Permissions are dropped.
+ std::string path = NewTempAbsPath();
+ int fd;
+ // Create a file without user permissions.
+ EXPECT_THAT( // SAVE_BELOW
+ fd = open(path.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 055),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ // Cannot open file because we are owner and have no permissions set.
+ EXPECT_THAT(open(path.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+
+ // We *can* chmod the file, because we are the owner.
+ EXPECT_THAT(chmod(path.c_str(), 0755), SyscallSucceeds());
+
+ // Now we can open the file again.
+ EXPECT_THAT(fd = open(path.c_str(), O_RDWR), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
index b8a0159ba..aabfa6955 100644
--- a/test/syscalls/linux/pty.cc
+++ b/test/syscalls/linux/pty.cc
@@ -364,6 +364,12 @@ PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count,
ssize_t n =
ReadFd(fd, static_cast<char*>(buf) + completed, count - completed);
if (n < 0) {
+ if (errno == EAGAIN) {
+ // Linux sometimes returns EAGAIN from this read, despite the fact that
+ // poll returned success. Let's just do what do as we are told and try
+ // again.
+ continue;
+ }
return PosixError(errno, "read failed");
}
completed += n;
diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc
index 8bf663e8b..591cab3fd 100644
--- a/test/syscalls/linux/socket_unix.cc
+++ b/test/syscalls/linux/socket_unix.cc
@@ -256,10 +256,9 @@ TEST_P(UnixSocketPairTest, ShutdownWrite) {
}
TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) {
- // TODO(b/122310852): We should be returning ENXIO and NOT EIO.
- // TODO(github.dev/issue/1624): This should be resolved in VFS2. Verify
- // that this is the case and delete the SKIP_IF once we delete VFS1.
- SKIP_IF(IsRunningOnGvisor());
+ // TODO(gvisor.dev/issue/1624): In VFS1, we return EIO instead of ENXIO (see
+ // b/122310852). Remove this skip once VFS1 is deleted.
+ SKIP_IF(IsRunningWithVFS1());
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
// Opening a socket pair via /proc/self/fd/X is a ENXIO.
diff --git a/tools/bazel.mk b/tools/bazel.mk
index 7cb6e393b..9f4a40669 100644
--- a/tools/bazel.mk
+++ b/tools/bazel.mk
@@ -21,7 +21,8 @@ BRANCH_NAME := $(shell (git branch --show-current 2>/dev/null || \
# Bazel container configuration (see below).
USER ?= gvisor
-DOCKER_NAME ?= gvisor-bazel-$(shell readlink -m $(CURDIR) | md5sum | cut -c1-8)
+HASH ?= $(shell readlink -m $(CURDIR) | md5sum | cut -c1-8)
+DOCKER_NAME ?= gvisor-bazel-$(HASH)
DOCKER_PRIVILEGED ?= --privileged
BAZEL_CACHE := $(shell readlink -m ~/.cache/bazel/)
GCLOUD_CONFIG := $(shell readlink -m ~/.config/gcloud/)
@@ -40,6 +41,7 @@ FULL_DOCKER_RUN_OPTIONS += -v "$(DOCKER_SOCKET):$(DOCKER_SOCKET)"
DOCKER_GROUP := $(shell stat -c '%g' $(DOCKER_SOCKET))
ifneq ($(GID),$(DOCKER_GROUP))
USERADD_OPTIONS += --groups $(DOCKER_GROUP)
+GROUPADD_DOCKER += groupadd --gid $(DOCKER_GROUP) --non-unique docker-$(HASH) &&
FULL_DOCKER_RUN_OPTIONS += --group-add $(DOCKER_GROUP)
endif
endif
@@ -71,10 +73,12 @@ bazel-server-start: load-default ## Starts the bazel server.
$(FULL_DOCKER_RUN_OPTIONS) \
gvisor.dev/images/default \
sh -c "groupadd --gid $(GID) --non-unique $(USER) && \
+ $(GROUPADD_DOCKER) \
useradd --uid $(UID) --non-unique --no-create-home --gid $(GID) $(USERADD_OPTIONS) -d $(HOME) $(USER) && \
bazel version && \
exec tail --pid=\$$(bazel info server_pid) -f /dev/null"
- @while :; do if docker logs $(DOCKER_NAME) 2>/dev/null | grep "Build label:" >/dev/null; then break; fi; sleep 1; done
+ @while :; do if docker logs $(DOCKER_NAME) 2>/dev/null | grep "Build label:" >/dev/null; then break; fi; \
+ if ! docker ps | grep $(DOCKER_NAME); then exit 1; else sleep 1; fi; done
.PHONY: bazel-server-start
bazel-shutdown: ## Shuts down a running bazel server.
@@ -89,14 +93,16 @@ bazel-server: ## Ensures that the server exists. Used as an internal target.
@docker exec $(DOCKER_NAME) true || $(MAKE) bazel-server-start
.PHONY: bazel-server
-build_paths = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -o pipefail -c 'bazel build $(OPTIONS) $(TARGETS) 2>&1 \
- | tee /dev/fd/2 \
+build_cmd = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -o pipefail -c 'bazel $(STARTUP_OPTIONS) build $(OPTIONS) $(TARGETS)'
+
+build_paths = $(build_cmd) 2>&1 \
+ | tee /proc/self/fd/2 \
| grep -E "^ bazel-bin/" \
- | awk "{print $$1;}"' \
+ | awk "{print $$1;}" \
| xargs -n 1 -I {} sh -c "$(1)"
build: bazel-server
- @$(call build_paths,echo {})
+ @$(call build_cmd)
.PHONY: build
copy: bazel-server
@@ -114,5 +120,5 @@ sudo: bazel-server
.PHONY: sudo
test: bazel-server
- @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel test $(OPTIONS) $(TARGETS)
+ @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel $(STARTUP_OPTIONS) test $(OPTIONS) $(TARGETS)
.PHONY: test
diff --git a/tools/go_branch.sh b/tools/go_branch.sh
index e568a0a76..093de89b4 100755
--- a/tools/go_branch.sh
+++ b/tools/go_branch.sh
@@ -88,6 +88,12 @@ EOF
# because they may correspond to unused templates, etc.
cp "${repo_orig}"/runsc/*.go runsc/
+# Normalize all permissions. The way bazel constructs the :gopath tree may leave
+# some strange permissions on files. We don't have anything in this tree that
+# should be execution, only the Go source files, README.md, and ${othersrc}.
+find . -type f -exec chmod 0644 {} \;
+find . -type d -exec chmod 0755 {} \;
+
# Update the current working set and commit.
git add . && git commit -m "Merge ${head} (automated)"
diff --git a/tools/make_repository.sh b/tools/make_apt.sh
index 32d7b3b1f..3fb1066e5 100755
--- a/tools/make_repository.sh
+++ b/tools/make_apt.sh
@@ -14,22 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# We need to be sure that only a repo path is printed on stdout.
-exec 50<&1
-exec 1<&2
-
-echo_stdout() {
- echo "$@" >&50
-}
-
-# Parse arguments. We require more than two arguments, which are the private
-# keyring, the e-mail associated with the signer, and the list of packages.
-if [ "$#" -le 3 ]; then
- echo "usage: $0 <private-key> <signer-email> <root> <packages...>"
+if [[ "$#" -le 3 ]]; then
+ echo "usage: $0 <private-key> <suite> <root> <packages...>"
exit 1
fi
declare -r private_key=$(readlink -e "$1"); shift
-declare -r signer="$1"; shift
+declare -r suite="$1"; shift
declare -r root="$1"; shift
# Ensure that we have the correct packages installed.
@@ -52,16 +42,16 @@ function apt_install() {
esac
done
}
-dpkg-sig --help >/dev/null || apt_install dpkg-sig
-apt-ftparchive --help >/dev/null || apt_install apt-utils
-xz --help >/dev/null || apt_install xz-utils
+dpkg-sig --help >/dev/null 2>&1 || apt_install dpkg-sig
+apt-ftparchive --help >/dev/null 2>&1 || apt_install apt-utils
+xz --help >/dev/null 2>&1 || apt_install xz-utils
# Verbose from this point.
set -xeo pipefail
-# Create a temporary working directory. We don't remove this, as we ultimately
-# print this result and allow the caller to copy wherever they would like.
-declare -r tmpdir=$(mktemp -d /tmp/repoXXXXXX)
+# Create a directory for the release.
+declare -r release="${root}/dists/${suite}"
+mkdir -p "${release}"
# Create a temporary keyring, and ensure it is cleaned up.
declare -r keyring=$(mktemp /tmp/keyringXXXXXX.gpg)
@@ -69,12 +59,18 @@ cleanup() {
rm -f "${keyring}"
}
trap cleanup EXIT
-gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}"
+
+# We attempt the import twice because the first one will fail if the public key
+# is not found. This isn't actually a failure for us, because we don't require
+# the public (this may be stored separately). The second import will succeed
+# because, in reality, the first import succeeded and it's a no-op.
+gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}" || \
+ gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}"
# Copy the packages into the root.
for pkg in "$@"; do
- name=$(basename "${pkg}" .deb)
- name=$(basename "${name}" .changes)
+ ext=${pkg##*.}
+ name=$(basename "${pkg}" ".${ext}")
arch=${name##*_}
if [[ "${name}" == "${arch}" ]]; then
continue # Not a regular package.
@@ -90,17 +86,22 @@ for pkg in "$@"; do
echo "Unknown file type: ${pkg}"
exit 1
fi
- version=${version// /} # Trim whitespace.
- mkdir -p "${root}"/pool/"${version}"/binary-"${arch}"
- cp -a "${pkg}" "${root}"/pool/"${version}"/binary-"${arch}"
-done
-# Ensure all permissions are correct.
-find "${root}"/pool -type f -exec chmod 0644 {} \;
+ # The package may already exist, in which case we leave it alone.
+ version=${version// /} # Trim whitespace.
+ destdir="${root}/pool/${version}/binary-${arch}"
+ target="${destdir}/${name}.${ext}"
+ if [[ -f "${target}" ]]; then
+ continue
+ fi
-# Sign all packages.
-for file in "${root}"/pool/*/binary-*/*.deb; do
- dpkg-sig -g "--no-default-keyring --keyring ${keyring}" --sign builder "${file}"
+ # Copy & sign the package.
+ mkdir -p "${destdir}"
+ cp -a "${pkg}" "${target}"
+ chmod 0644 "${target}"
+ if [[ "${ext}" == "deb" ]]; then
+ dpkg-sig -g "--no-default-keyring --keyring ${keyring}" --sign builder "${target}"
+ fi
done
# Build the package list.
@@ -109,7 +110,7 @@ for dir in "${root}"/pool/*/binary-*; do
name=$(basename "${dir}")
arch=${name##binary-}
arches+=("${arch}")
- repo_packages="${tmpdir}"/main/"${name}"
+ repo_packages="${release}"/main/"${name}"
mkdir -p "${repo_packages}"
(cd "${root}" && apt-ftparchive --arch "${arch}" packages pool > "${repo_packages}"/Packages)
(cd "${repo_packages}" && cat Packages | gzip > Packages.gz)
@@ -117,23 +118,22 @@ for dir in "${root}"/pool/*/binary-*; do
done
# Build the release list.
-cat > "${tmpdir}"/apt.conf <<EOF
+cat > "${release}"/apt.conf <<EOF
APT {
FTPArchive {
Release {
Architectures "${arches[@]}";
+ Suite "${suite}";
Components "main";
};
};
};
EOF
-(cd "${tmpdir}" && apt-ftparchive -c=apt.conf release . > Release)
-rm "${tmpdir}"/apt.conf
+(cd "${release}" && apt-ftparchive -c=apt.conf release . > Release)
+rm "${release}"/apt.conf
# Sign the release.
declare -r digest_opts=("--digest-algo" "SHA512" "--cert-digest-algo" "SHA512")
-(cd "${tmpdir}" && gpg --no-default-keyring --keyring "${keyring}" --clearsign "${digest_opts[@]}" -o InRelease Release)
-(cd "${tmpdir}" && gpg --no-default-keyring --keyring "${keyring}" -abs "${digest_opts[@]}" -o Release.gpg Release)
-
-# Show the results.
-echo_stdout "${tmpdir}"
+(cd "${release}" && rm -f Release.gpg InRelease)
+(cd "${release}" && gpg --no-default-keyring --keyring "${keyring}" --clearsign "${digest_opts[@]}" -o InRelease Release)
+(cd "${release}" && gpg --no-default-keyring --keyring "${keyring}" -abs "${digest_opts[@]}" -o Release.gpg Release)
diff --git a/tools/make_release.sh b/tools/make_release.sh
new file mode 100755
index 000000000..b1cdd47b0
--- /dev/null
+++ b/tools/make_release.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Copyright 2018 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ "$#" -le 2 ]]; then
+ echo "usage: $0 <private-key> <root> <binaries & packages...>"
+ echo "The environment variable NIGHTLY may be set to control"
+ echo "whether the nightly packages are produced or not."
+ exit 1
+fi
+
+set -xeo pipefail
+declare -r private_key="$1"; shift
+declare -r root="$1"; shift
+declare -a binaries
+declare -a pkgs
+
+# Collect binaries & packages.
+for arg in "$@"; do
+ if [[ "${arg}" == *.deb ]] || [[ "${arg}" == *.changes ]]; then
+ pkgs+=("${arg}")
+ else
+ binaries+=("${arg}")
+ fi
+done
+
+# install_raw installs raw artifacts.
+install_raw() {
+ mkdir -p "${root}/$1"
+ for binary in "${binaries[@]}"; do
+ # Copy the raw file & generate a sha512sum.
+ name=$(basename "${binary}")
+ cp -f "${binary}" "${root}/$1"
+ sha512sum "${root}/$1/${name}" | \
+ awk "{print $$1 \" ${name}\"}" > "${root}/$1/${name}.sha512"
+ done
+}
+
+# install_apt installs an apt repository.
+install_apt() {
+ tools/make_apt.sh "${private_key}" "$1" "${root}" "${pkgs[@]}"
+}
+
+# If nightly, install only nightly artifacts.
+if [[ "${NIGHTLY:-false}" == "true" ]]; then
+ # The "latest" directory and current date.
+ stamp="$(date -Idate)"
+ install_raw "nightly/latest"
+ install_raw "nightly/${stamp}"
+ install_apt "nightly"
+else
+ # Is it a tagged release? Build that.
+ tags="$(git tag --points-at HEAD 2>/dev/null || true)"
+ if ! [[ -z "${tags}" ]]; then
+ # Note that a given commit can match any number of tags. We have to iterate
+ # through all possible tags and produce associated artifacts.
+ for tag in ${tags}; do
+ name=$(echo "${tag}" | cut -d'-' -f2)
+ base=$(echo "${name}" | cut -d'.' -f1)
+ install_raw "release/${name}"
+ install_raw "release/latest"
+ install_apt "release"
+ install_apt "${base}"
+ done
+ else
+ # Otherwise, assume it is a raw master commit.
+ install_raw "master/latest"
+ install_apt "master"
+ fi
+fi
diff --git a/tools/tag_release.sh b/tools/tag_release.sh
index 4dbfe420a..b0bab74b4 100755
--- a/tools/tag_release.sh
+++ b/tools/tag_release.sh
@@ -18,10 +18,10 @@
# validate a provided release name, create a tag and push it. It must be
# run manually when a release is created.
-set -xeu
+set -xeuo pipefail
# Check arguments.
-if [ "$#" -ne 3 ]; then
+if [[ "$#" -ne 3 ]]; then
echo "usage: $0 <commit|revid> <release.rc> <message-file>"
exit 1
fi
@@ -30,6 +30,12 @@ declare -r target_commit="$1"
declare -r release="$2"
declare -r message_file="$3"
+if [[ -z "${target_commit}" ]]; then
+ echo "error: <commit|revid> is empty."
+fi
+if [[ -z "${release}" ]]; then
+ echo "error: <release.rc> is empty."
+fi
if ! [[ -r "${message_file}" ]]; then
echo "error: message file '${message_file}' is not readable."
exit 1
@@ -68,8 +74,9 @@ if ! [[ "${release}" =~ ^20[0-9]{6}\.[0-9]+$ ]]; then
exit 1
fi
-# Tag the given commit (annotated, to record the committer).
+# Tag the given commit (annotated, to record the committer). Note that the tag
+# here is applied as a force, in case the tag already exists and is the same.
+# The push will fail in this case (because it is not forced).
declare -r tag="release-${release}"
-(git tag -F "${message_file}" -a "${tag}" "${commit}" && \
- git push origin tag "${tag}") || \
- (git tag -d "${tag}" && false)
+git tag -f -F "${message_file}" -a "${tag}" "${commit}" && \
+ git push origin tag "${tag}"
diff --git a/website/_config.yml b/website/_config.yml
index 3241e458c..b08602970 100644
--- a/website/_config.yml
+++ b/website/_config.yml
@@ -12,6 +12,7 @@ plugins:
- jekyll-inline-svg
- jekyll-relative-links
- jekyll-feed
+ - jekyll-sitemap
site_url: https://gvisor.dev
feed:
path: blog/index.xml